1 /* $NetBSD: subr_extent.c,v 1.58 2006/04/15 05:07:15 christos Exp $ */ 2 3 /*- 4 * Copyright (c) 1996, 1998 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Jason R. Thorpe and Matthias Drochner. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. All advertising materials mentioning features or use of this software 19 * must display the following acknowledgement: 20 * This product includes software developed by the NetBSD 21 * Foundation, Inc. and its contributors. 22 * 4. Neither the name of The NetBSD Foundation nor the names of its 23 * contributors may be used to endorse or promote products derived 24 * from this software without specific prior written permission. 25 * 26 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 27 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 28 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 29 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 30 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 31 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 32 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 33 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 34 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 35 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 36 * POSSIBILITY OF SUCH DAMAGE. 37 */ 38 39 /* 40 * General purpose extent manager. 41 */ 42 43 #include <sys/cdefs.h> 44 __KERNEL_RCSID(0, "$NetBSD: subr_extent.c,v 1.58 2006/04/15 05:07:15 christos Exp $"); 45 46 #ifdef _KERNEL 47 #include "opt_lockdebug.h" 48 49 #include <sys/param.h> 50 #include <sys/extent.h> 51 #include <sys/malloc.h> 52 #include <sys/pool.h> 53 #include <sys/time.h> 54 #include <sys/systm.h> 55 #include <sys/proc.h> 56 #include <sys/lock.h> 57 58 #include <uvm/uvm_extern.h> 59 60 #define KMEM_IS_RUNNING (kmem_map != NULL) 61 #elif defined(_EXTENT_TESTING) 62 /* 63 * user-land definitions, so it can fit into a testing harness. 64 */ 65 #include <sys/param.h> 66 #include <sys/pool.h> 67 #include <sys/extent.h> 68 #include <errno.h> 69 #include <stdlib.h> 70 #include <stdio.h> 71 #include <string.h> 72 73 /* 74 * Use multi-line #defines to avoid screwing up the kernel tags file; 75 * without this, ctags produces a tags file where panic() shows up 76 * in subr_extent.c rather than subr_prf.c. 77 */ 78 #define \ 79 malloc(s, t, flags) malloc(s) 80 #define \ 81 free(p, t) free(p) 82 #define \ 83 tsleep(chan, pri, str, timo) (EWOULDBLOCK) 84 #define \ 85 ltsleep(chan,pri,str,timo,lck) (EWOULDBLOCK) 86 #define \ 87 wakeup(chan) ((void)0) 88 #define \ 89 pool_get(pool, flags) malloc((pool)->pr_size,0,0) 90 #define \ 91 pool_put(pool, rp) free(rp,0) 92 #define \ 93 panic(a) printf(a) 94 #define \ 95 splhigh() (1) 96 #define \ 97 splx(s) ((void)(s)) 98 99 #define \ 100 simple_lock_init(l) ((void)(l)) 101 #define \ 102 simple_lock(l) ((void)(l)) 103 #define \ 104 simple_unlock(l) ((void)(l)) 105 #define KMEM_IS_RUNNING (1) 106 #endif 107 108 static struct pool expool; 109 static struct simplelock expool_init_slock = SIMPLELOCK_INITIALIZER; 110 static int expool_initialized; 111 112 /* 113 * Macro to align to an arbitrary power-of-two boundary. 114 */ 115 #define EXTENT_ALIGN(_start, _align, _skew) \ 116 (((((_start) - (_skew)) + ((_align) - 1)) & (-(_align))) + (_skew)) 117 118 /* 119 * Create the extent_region pool. 120 * (This is deferred until one of our callers thinks we can malloc()). 121 */ 122 123 static inline void 124 expool_init(void) 125 { 126 127 simple_lock(&expool_init_slock); 128 if (expool_initialized) { 129 simple_unlock(&expool_init_slock); 130 return; 131 } 132 133 #if defined(_KERNEL) 134 pool_init(&expool, sizeof(struct extent_region), 0, 0, 0, 135 "extent", NULL); 136 #else 137 expool.pr_size = sizeof(struct extent_region); 138 #endif 139 140 expool_initialized = 1; 141 simple_unlock(&expool_init_slock); 142 } 143 144 /* 145 * Allocate an extent region descriptor. EXTENT MUST NOT BE LOCKED, 146 * AS THIS FUNCTION MAY BLOCK! We will handle any locking we may need. 147 */ 148 static struct extent_region * 149 extent_alloc_region_descriptor(struct extent *ex, int flags) 150 { 151 struct extent_region *rp; 152 int exflags; 153 int s; 154 155 /* 156 * If the kernel memory allocator is not yet running, we can't 157 * use it (obviously). 158 */ 159 if (KMEM_IS_RUNNING == 0) 160 flags &= ~EX_MALLOCOK; 161 162 /* 163 * XXX Make a static, create-time flags word, so we don't 164 * XXX have to lock to read it! 165 */ 166 simple_lock(&ex->ex_slock); 167 exflags = ex->ex_flags; 168 simple_unlock(&ex->ex_slock); 169 170 if (exflags & EXF_FIXED) { 171 struct extent_fixed *fex = (struct extent_fixed *)ex; 172 173 for (;;) { 174 simple_lock(&ex->ex_slock); 175 if ((rp = LIST_FIRST(&fex->fex_freelist)) != NULL) { 176 /* 177 * Don't muck with flags after pulling it off 178 * the freelist; it may have been dynamically 179 * allocated, and kindly given to us. We 180 * need to remember that information. 181 */ 182 LIST_REMOVE(rp, er_link); 183 simple_unlock(&ex->ex_slock); 184 return (rp); 185 } 186 if (flags & EX_MALLOCOK) { 187 simple_unlock(&ex->ex_slock); 188 goto alloc; 189 } 190 if ((flags & EX_WAITOK) == 0) { 191 simple_unlock(&ex->ex_slock); 192 return (NULL); 193 } 194 ex->ex_flags |= EXF_FLWANTED; 195 if (ltsleep(&fex->fex_freelist, 196 PNORELOCK| PRIBIO | ((flags & EX_CATCH) ? PCATCH : 0), 197 "extnt", 0, &ex->ex_slock)) 198 return (NULL); 199 } 200 } 201 202 alloc: 203 s = splhigh(); 204 if (expool_initialized == 0) 205 expool_init(); 206 rp = pool_get(&expool, (flags & EX_WAITOK) ? PR_WAITOK : 0); 207 splx(s); 208 209 if (rp != NULL) 210 rp->er_flags = ER_ALLOC; 211 212 return (rp); 213 } 214 215 /* 216 * Free an extent region descriptor. EXTENT _MUST_ BE LOCKED! This 217 * is safe as we do not block here. 218 */ 219 static void 220 extent_free_region_descriptor(struct extent *ex, struct extent_region *rp) 221 { 222 int s; 223 224 if (ex->ex_flags & EXF_FIXED) { 225 struct extent_fixed *fex = (struct extent_fixed *)ex; 226 227 /* 228 * If someone's waiting for a region descriptor, 229 * be nice and give them this one, rather than 230 * just free'ing it back to the system. 231 */ 232 if (rp->er_flags & ER_ALLOC) { 233 if (ex->ex_flags & EXF_FLWANTED) { 234 /* Clear all but ER_ALLOC flag. */ 235 rp->er_flags = ER_ALLOC; 236 LIST_INSERT_HEAD(&fex->fex_freelist, rp, 237 er_link); 238 goto wake_em_up; 239 } else { 240 s = splhigh(); 241 pool_put(&expool, rp); 242 splx(s); 243 } 244 } else { 245 /* Clear all flags. */ 246 rp->er_flags = 0; 247 LIST_INSERT_HEAD(&fex->fex_freelist, rp, er_link); 248 } 249 250 if (ex->ex_flags & EXF_FLWANTED) { 251 wake_em_up: 252 ex->ex_flags &= ~EXF_FLWANTED; 253 wakeup(&fex->fex_freelist); 254 } 255 return; 256 } 257 258 /* 259 * We know it's dynamically allocated if we get here. 260 */ 261 s = splhigh(); 262 pool_put(&expool, rp); 263 splx(s); 264 } 265 266 /* 267 * Allocate and initialize an extent map. 268 */ 269 struct extent * 270 extent_create(const char *name, u_long start, u_long end, 271 struct malloc_type *mtype, caddr_t storage, size_t storagesize, int flags) 272 { 273 struct extent *ex; 274 caddr_t cp = storage; 275 size_t sz = storagesize; 276 struct extent_region *rp; 277 int fixed_extent = (storage != NULL); 278 int s; 279 280 #ifdef DIAGNOSTIC 281 /* Check arguments. */ 282 if (name == NULL) 283 panic("extent_create: name == NULL"); 284 if (end < start) { 285 printf("extent_create: extent `%s', start 0x%lx, end 0x%lx\n", 286 name, start, end); 287 panic("extent_create: end < start"); 288 } 289 if (fixed_extent && (storagesize < sizeof(struct extent_fixed))) 290 panic("extent_create: fixed extent, bad storagesize 0x%lx", 291 (u_long)storagesize); 292 if (fixed_extent == 0 && (storagesize != 0 || storage != NULL)) 293 panic("extent_create: storage provided for non-fixed"); 294 #endif 295 296 /* Allocate extent descriptor. */ 297 if (fixed_extent) { 298 struct extent_fixed *fex; 299 300 memset(storage, 0, storagesize); 301 302 /* 303 * Align all descriptors on "long" boundaries. 304 */ 305 fex = (struct extent_fixed *)cp; 306 ex = (struct extent *)fex; 307 cp += ALIGN(sizeof(struct extent_fixed)); 308 sz -= ALIGN(sizeof(struct extent_fixed)); 309 fex->fex_storage = storage; 310 fex->fex_storagesize = storagesize; 311 312 /* 313 * In a fixed extent, we have to pre-allocate region 314 * descriptors and place them in the extent's freelist. 315 */ 316 LIST_INIT(&fex->fex_freelist); 317 while (sz >= ALIGN(sizeof(struct extent_region))) { 318 rp = (struct extent_region *)cp; 319 cp += ALIGN(sizeof(struct extent_region)); 320 sz -= ALIGN(sizeof(struct extent_region)); 321 LIST_INSERT_HEAD(&fex->fex_freelist, rp, er_link); 322 } 323 } else { 324 s = splhigh(); 325 if (expool_initialized == 0) 326 expool_init(); 327 splx(s); 328 329 ex = (struct extent *)malloc(sizeof(struct extent), 330 mtype, (flags & EX_WAITOK) ? M_WAITOK : M_NOWAIT); 331 if (ex == NULL) 332 return (NULL); 333 } 334 335 /* Fill in the extent descriptor and return it to the caller. */ 336 simple_lock_init(&ex->ex_slock); 337 LIST_INIT(&ex->ex_regions); 338 ex->ex_name = name; 339 ex->ex_start = start; 340 ex->ex_end = end; 341 ex->ex_mtype = mtype; 342 ex->ex_flags = 0; 343 if (fixed_extent) 344 ex->ex_flags |= EXF_FIXED; 345 if (flags & EX_NOCOALESCE) 346 ex->ex_flags |= EXF_NOCOALESCE; 347 return (ex); 348 } 349 350 /* 351 * Destroy an extent map. 352 * Since we're freeing the data, there can't be any references 353 * so we don't need any locking. 354 */ 355 void 356 extent_destroy(struct extent *ex) 357 { 358 struct extent_region *rp, *orp; 359 360 #ifdef DIAGNOSTIC 361 /* Check arguments. */ 362 if (ex == NULL) 363 panic("extent_destroy: NULL extent"); 364 #endif 365 366 /* Free all region descriptors in extent. */ 367 for (rp = LIST_FIRST(&ex->ex_regions); rp != NULL; ) { 368 orp = rp; 369 rp = LIST_NEXT(rp, er_link); 370 LIST_REMOVE(orp, er_link); 371 extent_free_region_descriptor(ex, orp); 372 } 373 374 /* If we're not a fixed extent, free the extent descriptor itself. */ 375 if ((ex->ex_flags & EXF_FIXED) == 0) 376 free(ex, ex->ex_mtype); 377 } 378 379 /* 380 * Insert a region descriptor into the sorted region list after the 381 * entry "after" or at the head of the list (if "after" is NULL). 382 * The region descriptor we insert is passed in "rp". We must 383 * allocate the region descriptor before calling this function! 384 * If we don't need the region descriptor, it will be freed here. 385 */ 386 static void 387 extent_insert_and_optimize(struct extent *ex, u_long start, u_long size, 388 int flags, struct extent_region *after, struct extent_region *rp) 389 { 390 struct extent_region *nextr; 391 int appended = 0; 392 393 if (after == NULL) { 394 /* 395 * We're the first in the region list. If there's 396 * a region after us, attempt to coalesce to save 397 * descriptor overhead. 398 */ 399 if (((ex->ex_flags & EXF_NOCOALESCE) == 0) && 400 (LIST_FIRST(&ex->ex_regions) != NULL) && 401 ((start + size) == LIST_FIRST(&ex->ex_regions)->er_start)) { 402 /* 403 * We can coalesce. Prepend us to the first region. 404 */ 405 LIST_FIRST(&ex->ex_regions)->er_start = start; 406 extent_free_region_descriptor(ex, rp); 407 return; 408 } 409 410 /* 411 * Can't coalesce. Fill in the region descriptor 412 * in, and insert us at the head of the region list. 413 */ 414 rp->er_start = start; 415 rp->er_end = start + (size - 1); 416 LIST_INSERT_HEAD(&ex->ex_regions, rp, er_link); 417 return; 418 } 419 420 /* 421 * If EXF_NOCOALESCE is set, coalescing is disallowed. 422 */ 423 if (ex->ex_flags & EXF_NOCOALESCE) 424 goto cant_coalesce; 425 426 /* 427 * Attempt to coalesce with the region before us. 428 */ 429 if ((after->er_end + 1) == start) { 430 /* 431 * We can coalesce. Append ourselves and make 432 * note of it. 433 */ 434 after->er_end = start + (size - 1); 435 appended = 1; 436 } 437 438 /* 439 * Attempt to coalesce with the region after us. 440 */ 441 if ((LIST_NEXT(after, er_link) != NULL) && 442 ((start + size) == LIST_NEXT(after, er_link)->er_start)) { 443 /* 444 * We can coalesce. Note that if we appended ourselves 445 * to the previous region, we exactly fit the gap, and 446 * can free the "next" region descriptor. 447 */ 448 if (appended) { 449 /* 450 * Yup, we can free it up. 451 */ 452 after->er_end = LIST_NEXT(after, er_link)->er_end; 453 nextr = LIST_NEXT(after, er_link); 454 LIST_REMOVE(nextr, er_link); 455 extent_free_region_descriptor(ex, nextr); 456 } else { 457 /* 458 * Nope, just prepend us to the next region. 459 */ 460 LIST_NEXT(after, er_link)->er_start = start; 461 } 462 463 extent_free_region_descriptor(ex, rp); 464 return; 465 } 466 467 /* 468 * We weren't able to coalesce with the next region, but 469 * we don't need to allocate a region descriptor if we 470 * appended ourselves to the previous region. 471 */ 472 if (appended) { 473 extent_free_region_descriptor(ex, rp); 474 return; 475 } 476 477 cant_coalesce: 478 479 /* 480 * Fill in the region descriptor and insert ourselves 481 * into the region list. 482 */ 483 rp->er_start = start; 484 rp->er_end = start + (size - 1); 485 LIST_INSERT_AFTER(after, rp, er_link); 486 } 487 488 /* 489 * Allocate a specific region in an extent map. 490 */ 491 int 492 extent_alloc_region(struct extent *ex, u_long start, u_long size, int flags) 493 { 494 struct extent_region *rp, *last, *myrp; 495 u_long end = start + (size - 1); 496 int error; 497 498 #ifdef DIAGNOSTIC 499 /* Check arguments. */ 500 if (ex == NULL) 501 panic("extent_alloc_region: NULL extent"); 502 if (size < 1) { 503 printf("extent_alloc_region: extent `%s', size 0x%lx\n", 504 ex->ex_name, size); 505 panic("extent_alloc_region: bad size"); 506 } 507 if (end < start) { 508 printf( 509 "extent_alloc_region: extent `%s', start 0x%lx, size 0x%lx\n", 510 ex->ex_name, start, size); 511 panic("extent_alloc_region: overflow"); 512 } 513 #endif 514 #ifdef LOCKDEBUG 515 if (flags & EX_WAITSPACE) 516 simple_lock_only_held(NULL, 517 "extent_alloc_region(EX_WAITSPACE)"); 518 #endif 519 520 /* 521 * Make sure the requested region lies within the 522 * extent. 523 * 524 * We don't lock to check the range, because those values 525 * are never modified, and if another thread deletes the 526 * extent, we're screwed anyway. 527 */ 528 if ((start < ex->ex_start) || (end > ex->ex_end)) { 529 #ifdef DIAGNOSTIC 530 printf("extent_alloc_region: extent `%s' (0x%lx - 0x%lx)\n", 531 ex->ex_name, ex->ex_start, ex->ex_end); 532 printf("extent_alloc_region: start 0x%lx, end 0x%lx\n", 533 start, end); 534 panic("extent_alloc_region: region lies outside extent"); 535 #else 536 return (EINVAL); 537 #endif 538 } 539 540 /* 541 * Allocate the region descriptor. It will be freed later 542 * if we can coalesce with another region. Don't lock before 543 * here! This could block. 544 */ 545 myrp = extent_alloc_region_descriptor(ex, flags); 546 if (myrp == NULL) { 547 #ifdef DIAGNOSTIC 548 printf( 549 "extent_alloc_region: can't allocate region descriptor\n"); 550 #endif 551 return (ENOMEM); 552 } 553 554 alloc_start: 555 simple_lock(&ex->ex_slock); 556 557 /* 558 * Attempt to place ourselves in the desired area of the 559 * extent. We save ourselves some work by keeping the list sorted. 560 * In other words, if the start of the current region is greater 561 * than the end of our region, we don't have to search any further. 562 */ 563 564 /* 565 * Keep a pointer to the last region we looked at so 566 * that we don't have to traverse the list again when 567 * we insert ourselves. If "last" is NULL when we 568 * finally insert ourselves, we go at the head of the 569 * list. See extent_insert_and_optimize() for details. 570 */ 571 last = NULL; 572 573 LIST_FOREACH(rp, &ex->ex_regions, er_link) { 574 if (rp->er_start > end) { 575 /* 576 * We lie before this region and don't 577 * conflict. 578 */ 579 break; 580 } 581 582 /* 583 * The current region begins before we end. 584 * Check for a conflict. 585 */ 586 if (rp->er_end >= start) { 587 /* 588 * We conflict. If we can (and want to) wait, 589 * do so. 590 */ 591 if (flags & EX_WAITSPACE) { 592 ex->ex_flags |= EXF_WANTED; 593 error = ltsleep(ex, 594 PNORELOCK | PRIBIO | ((flags & EX_CATCH) ? PCATCH : 0), 595 "extnt", 0, &ex->ex_slock); 596 if (error == 0) 597 goto alloc_start; 598 } else { 599 simple_unlock(&ex->ex_slock); 600 error = EAGAIN; 601 } 602 extent_free_region_descriptor(ex, myrp); 603 return error; 604 } 605 /* 606 * We don't conflict, but this region lies before 607 * us. Keep a pointer to this region, and keep 608 * trying. 609 */ 610 last = rp; 611 } 612 613 /* 614 * We don't conflict with any regions. "last" points 615 * to the region we fall after, or is NULL if we belong 616 * at the beginning of the region list. Insert ourselves. 617 */ 618 extent_insert_and_optimize(ex, start, size, flags, last, myrp); 619 simple_unlock(&ex->ex_slock); 620 return (0); 621 } 622 623 /* 624 * Macro to check (x + y) <= z. This check is designed to fail 625 * if an overflow occurs. 626 */ 627 #define LE_OV(x, y, z) ((((x) + (y)) >= (x)) && (((x) + (y)) <= (z))) 628 629 /* 630 * Allocate a region in an extent map subregion. 631 * 632 * If EX_FAST is specified, we return the first fit in the map. 633 * Otherwise, we try to minimize fragmentation by finding the 634 * smallest gap that will hold the request. 635 * 636 * The allocated region is aligned to "alignment", which must be 637 * a power of 2. 638 */ 639 int 640 extent_alloc_subregion1(struct extent *ex, u_long substart, u_long subend, 641 u_long size, u_long alignment, u_long skew, u_long boundary, 642 int flags, u_long *result) 643 { 644 struct extent_region *rp, *myrp, *last, *bestlast; 645 u_long newstart, newend, exend, beststart, bestovh, ovh; 646 u_long dontcross; 647 int error; 648 649 #ifdef DIAGNOSTIC 650 /* 651 * Check arguments. 652 * 653 * We don't lock to check these, because these values 654 * are never modified, and if another thread deletes the 655 * extent, we're screwed anyway. 656 */ 657 if (ex == NULL) 658 panic("extent_alloc_subregion: NULL extent"); 659 if (result == NULL) 660 panic("extent_alloc_subregion: NULL result pointer"); 661 if ((substart < ex->ex_start) || (substart > ex->ex_end) || 662 (subend > ex->ex_end) || (subend < ex->ex_start)) { 663 printf("extent_alloc_subregion: extent `%s', ex_start 0x%lx, ex_end 0x%lx\n", 664 ex->ex_name, ex->ex_start, ex->ex_end); 665 printf("extent_alloc_subregion: substart 0x%lx, subend 0x%lx\n", 666 substart, subend); 667 panic("extent_alloc_subregion: bad subregion"); 668 } 669 if ((size < 1) || ((size - 1) > (subend - substart))) { 670 printf("extent_alloc_subregion: extent `%s', size 0x%lx\n", 671 ex->ex_name, size); 672 panic("extent_alloc_subregion: bad size"); 673 } 674 if (alignment == 0) 675 panic("extent_alloc_subregion: bad alignment"); 676 if (boundary && (boundary < size)) { 677 printf( 678 "extent_alloc_subregion: extent `%s', size 0x%lx, " 679 "boundary 0x%lx\n", ex->ex_name, size, boundary); 680 panic("extent_alloc_subregion: bad boundary"); 681 } 682 #endif 683 #ifdef LOCKDEBUG 684 if (flags & EX_WAITSPACE) 685 simple_lock_only_held(NULL, 686 "extent_alloc_subregion1(EX_WAITSPACE)"); 687 #endif 688 689 /* 690 * Allocate the region descriptor. It will be freed later 691 * if we can coalesce with another region. Don't lock before 692 * here! This could block. 693 */ 694 myrp = extent_alloc_region_descriptor(ex, flags); 695 if (myrp == NULL) { 696 #ifdef DIAGNOSTIC 697 printf( 698 "extent_alloc_subregion: can't allocate region descriptor\n"); 699 #endif 700 return (ENOMEM); 701 } 702 703 alloc_start: 704 simple_lock(&ex->ex_slock); 705 706 /* 707 * Keep a pointer to the last region we looked at so 708 * that we don't have to traverse the list again when 709 * we insert ourselves. If "last" is NULL when we 710 * finally insert ourselves, we go at the head of the 711 * list. See extent_insert_and_optimize() for deatails. 712 */ 713 last = NULL; 714 715 /* 716 * Keep track of size and location of the smallest 717 * chunk we fit in. 718 * 719 * Since the extent can be as large as the numeric range 720 * of the CPU (0 - 0xffffffff for 32-bit systems), the 721 * best overhead value can be the maximum unsigned integer. 722 * Thus, we initialize "bestovh" to 0, since we insert ourselves 723 * into the region list immediately on an exact match (which 724 * is the only case where "bestovh" would be set to 0). 725 */ 726 bestovh = 0; 727 beststart = 0; 728 bestlast = NULL; 729 730 /* 731 * Keep track of end of free region. This is either the end of extent 732 * or the start of a region past the subend. 733 */ 734 exend = ex->ex_end; 735 736 /* 737 * For N allocated regions, we must make (N + 1) 738 * checks for unallocated space. The first chunk we 739 * check is the area from the beginning of the subregion 740 * to the first allocated region after that point. 741 */ 742 newstart = EXTENT_ALIGN(substart, alignment, skew); 743 if (newstart < ex->ex_start) { 744 #ifdef DIAGNOSTIC 745 printf( 746 "extent_alloc_subregion: extent `%s' (0x%lx - 0x%lx), alignment 0x%lx\n", 747 ex->ex_name, ex->ex_start, ex->ex_end, alignment); 748 simple_unlock(&ex->ex_slock); 749 panic("extent_alloc_subregion: overflow after alignment"); 750 #else 751 extent_free_region_descriptor(ex, myrp); 752 simple_unlock(&ex->ex_slock); 753 return (EINVAL); 754 #endif 755 } 756 757 /* 758 * Find the first allocated region that begins on or after 759 * the subregion start, advancing the "last" pointer along 760 * the way. 761 */ 762 LIST_FOREACH(rp, &ex->ex_regions, er_link) { 763 if (rp->er_start >= newstart) 764 break; 765 last = rp; 766 } 767 768 /* 769 * Relocate the start of our candidate region to the end of 770 * the last allocated region (if there was one overlapping 771 * our subrange). 772 */ 773 if (last != NULL && last->er_end >= newstart) 774 newstart = EXTENT_ALIGN((last->er_end + 1), alignment, skew); 775 776 for (; rp != NULL; rp = LIST_NEXT(rp, er_link)) { 777 /* 778 * If the region pasts the subend, bail out and see 779 * if we fit against the subend. 780 */ 781 if (rp->er_start > subend) { 782 exend = rp->er_start; 783 break; 784 } 785 786 /* 787 * Check the chunk before "rp". Note that our 788 * comparison is safe from overflow conditions. 789 */ 790 if (LE_OV(newstart, size, rp->er_start)) { 791 /* 792 * Do a boundary check, if necessary. Note 793 * that a region may *begin* on the boundary, 794 * but it must end before the boundary. 795 */ 796 if (boundary) { 797 newend = newstart + (size - 1); 798 799 /* 800 * Calculate the next boundary after the start 801 * of this region. 802 */ 803 dontcross = EXTENT_ALIGN(newstart+1, boundary, 804 (flags & EX_BOUNDZERO) ? 0 : ex->ex_start) 805 - 1; 806 807 #if 0 808 printf("newstart=%lx newend=%lx ex_start=%lx ex_end=%lx boundary=%lx dontcross=%lx\n", 809 newstart, newend, ex->ex_start, ex->ex_end, 810 boundary, dontcross); 811 #endif 812 813 /* Check for overflow */ 814 if (dontcross < ex->ex_start) 815 dontcross = ex->ex_end; 816 else if (newend > dontcross) { 817 /* 818 * Candidate region crosses boundary. 819 * Throw away the leading part and see 820 * if we still fit. 821 */ 822 newstart = dontcross + 1; 823 newend = newstart + (size - 1); 824 dontcross += boundary; 825 if (!LE_OV(newstart, size, rp->er_start)) 826 goto skip; 827 } 828 829 /* 830 * If we run past the end of 831 * the extent or the boundary 832 * overflows, then the request 833 * can't fit. 834 */ 835 if (newstart + size - 1 > ex->ex_end || 836 dontcross < newstart) 837 goto fail; 838 } 839 840 /* 841 * We would fit into this space. Calculate 842 * the overhead (wasted space). If we exactly 843 * fit, or we're taking the first fit, insert 844 * ourselves into the region list. 845 */ 846 ovh = rp->er_start - newstart - size; 847 if ((flags & EX_FAST) || (ovh == 0)) 848 goto found; 849 850 /* 851 * Don't exactly fit, but check to see 852 * if we're better than any current choice. 853 */ 854 if ((bestovh == 0) || (ovh < bestovh)) { 855 bestovh = ovh; 856 beststart = newstart; 857 bestlast = last; 858 } 859 } 860 861 skip: 862 /* 863 * Skip past the current region and check again. 864 */ 865 newstart = EXTENT_ALIGN((rp->er_end + 1), alignment, skew); 866 if (newstart < rp->er_end) { 867 /* 868 * Overflow condition. Don't error out, since 869 * we might have a chunk of space that we can 870 * use. 871 */ 872 goto fail; 873 } 874 875 last = rp; 876 } 877 878 /* 879 * The final check is from the current starting point to the 880 * end of the subregion. If there were no allocated regions, 881 * "newstart" is set to the beginning of the subregion, or 882 * just past the end of the last allocated region, adjusted 883 * for alignment in either case. 884 */ 885 if (LE_OV(newstart, (size - 1), subend)) { 886 /* 887 * Do a boundary check, if necessary. Note 888 * that a region may *begin* on the boundary, 889 * but it must end before the boundary. 890 */ 891 if (boundary) { 892 newend = newstart + (size - 1); 893 894 /* 895 * Calculate the next boundary after the start 896 * of this region. 897 */ 898 dontcross = EXTENT_ALIGN(newstart+1, boundary, 899 (flags & EX_BOUNDZERO) ? 0 : ex->ex_start) 900 - 1; 901 902 #if 0 903 printf("newstart=%lx newend=%lx ex_start=%lx ex_end=%lx boundary=%lx dontcross=%lx\n", 904 newstart, newend, ex->ex_start, ex->ex_end, 905 boundary, dontcross); 906 #endif 907 908 /* Check for overflow */ 909 if (dontcross < ex->ex_start) 910 dontcross = ex->ex_end; 911 else if (newend > dontcross) { 912 /* 913 * Candidate region crosses boundary. 914 * Throw away the leading part and see 915 * if we still fit. 916 */ 917 newstart = dontcross + 1; 918 newend = newstart + (size - 1); 919 dontcross += boundary; 920 if (!LE_OV(newstart, (size - 1), subend)) 921 goto fail; 922 } 923 924 /* 925 * If we run past the end of 926 * the extent or the boundary 927 * overflows, then the request 928 * can't fit. 929 */ 930 if (newstart + size - 1 > ex->ex_end || 931 dontcross < newstart) 932 goto fail; 933 } 934 935 /* 936 * We would fit into this space. Calculate 937 * the overhead (wasted space). If we exactly 938 * fit, or we're taking the first fit, insert 939 * ourselves into the region list. 940 */ 941 ovh = exend - newstart - (size - 1); 942 if ((flags & EX_FAST) || (ovh == 0)) 943 goto found; 944 945 /* 946 * Don't exactly fit, but check to see 947 * if we're better than any current choice. 948 */ 949 if ((bestovh == 0) || (ovh < bestovh)) { 950 bestovh = ovh; 951 beststart = newstart; 952 bestlast = last; 953 } 954 } 955 956 fail: 957 /* 958 * One of the following two conditions have 959 * occurred: 960 * 961 * There is no chunk large enough to hold the request. 962 * 963 * If EX_FAST was not specified, there is not an 964 * exact match for the request. 965 * 966 * Note that if we reach this point and EX_FAST is 967 * set, then we know there is no space in the extent for 968 * the request. 969 */ 970 if (((flags & EX_FAST) == 0) && (bestovh != 0)) { 971 /* 972 * We have a match that's "good enough". 973 */ 974 newstart = beststart; 975 last = bestlast; 976 goto found; 977 } 978 979 /* 980 * No space currently available. Wait for it to free up, 981 * if possible. 982 */ 983 if (flags & EX_WAITSPACE) { 984 ex->ex_flags |= EXF_WANTED; 985 error = ltsleep(ex, 986 PNORELOCK | PRIBIO | ((flags & EX_CATCH) ? PCATCH : 0), 987 "extnt", 0, &ex->ex_slock); 988 if (error == 0) 989 goto alloc_start; 990 } else { 991 simple_unlock(&ex->ex_slock); 992 error = EAGAIN; 993 } 994 995 extent_free_region_descriptor(ex, myrp); 996 return error; 997 998 found: 999 /* 1000 * Insert ourselves into the region list. 1001 */ 1002 extent_insert_and_optimize(ex, newstart, size, flags, last, myrp); 1003 simple_unlock(&ex->ex_slock); 1004 *result = newstart; 1005 return (0); 1006 } 1007 1008 int 1009 extent_alloc_subregion(struct extent *ex, u_long start, u_long end, u_long size, 1010 u_long alignment, u_long boundary, int flags, u_long *result) 1011 { 1012 1013 return (extent_alloc_subregion1(ex, start, end, size, alignment, 1014 0, boundary, flags, result)); 1015 } 1016 1017 int 1018 extent_alloc(struct extent *ex, u_long size, u_long alignment, u_long boundary, 1019 int flags, u_long *result) 1020 { 1021 1022 return (extent_alloc_subregion1(ex, ex->ex_start, ex->ex_end, 1023 size, alignment, 0, boundary, 1024 flags, result)); 1025 } 1026 1027 int 1028 extent_alloc1(struct extent *ex, u_long size, u_long alignment, u_long skew, 1029 u_long boundary, int flags, u_long *result) 1030 { 1031 1032 return (extent_alloc_subregion1(ex, ex->ex_start, ex->ex_end, 1033 size, alignment, skew, boundary, 1034 flags, result)); 1035 } 1036 1037 int 1038 extent_free(struct extent *ex, u_long start, u_long size, int flags) 1039 { 1040 struct extent_region *rp, *nrp = NULL; 1041 u_long end = start + (size - 1); 1042 int coalesce; 1043 1044 #ifdef DIAGNOSTIC 1045 /* 1046 * Check arguments. 1047 * 1048 * We don't lock to check these, because these values 1049 * are never modified, and if another thread deletes the 1050 * extent, we're screwed anyway. 1051 */ 1052 if (ex == NULL) 1053 panic("extent_free: NULL extent"); 1054 if ((start < ex->ex_start) || (end > ex->ex_end)) { 1055 extent_print(ex); 1056 printf("extent_free: extent `%s', start 0x%lx, size 0x%lx\n", 1057 ex->ex_name, start, size); 1058 panic("extent_free: extent `%s', region not within extent", 1059 ex->ex_name); 1060 } 1061 /* Check for an overflow. */ 1062 if (end < start) { 1063 extent_print(ex); 1064 printf("extent_free: extent `%s', start 0x%lx, size 0x%lx\n", 1065 ex->ex_name, start, size); 1066 panic("extent_free: overflow"); 1067 } 1068 #endif 1069 1070 /* 1071 * If we're allowing coalescing, we must allocate a region 1072 * descriptor now, since it might block. 1073 * 1074 * XXX Make a static, create-time flags word, so we don't 1075 * XXX have to lock to read it! 1076 */ 1077 simple_lock(&ex->ex_slock); 1078 coalesce = (ex->ex_flags & EXF_NOCOALESCE) == 0; 1079 simple_unlock(&ex->ex_slock); 1080 1081 if (coalesce) { 1082 /* Allocate a region descriptor. */ 1083 nrp = extent_alloc_region_descriptor(ex, flags); 1084 if (nrp == NULL) 1085 return (ENOMEM); 1086 } 1087 1088 simple_lock(&ex->ex_slock); 1089 1090 /* 1091 * Find region and deallocate. Several possibilities: 1092 * 1093 * 1. (start == er_start) && (end == er_end): 1094 * Free descriptor. 1095 * 1096 * 2. (start == er_start) && (end < er_end): 1097 * Adjust er_start. 1098 * 1099 * 3. (start > er_start) && (end == er_end): 1100 * Adjust er_end. 1101 * 1102 * 4. (start > er_start) && (end < er_end): 1103 * Fragment region. Requires descriptor alloc. 1104 * 1105 * Cases 2, 3, and 4 require that the EXF_NOCOALESCE flag 1106 * is not set. 1107 */ 1108 LIST_FOREACH(rp, &ex->ex_regions, er_link) { 1109 /* 1110 * Save ourselves some comparisons; does the current 1111 * region end before chunk to be freed begins? If so, 1112 * then we haven't found the appropriate region descriptor. 1113 */ 1114 if (rp->er_end < start) 1115 continue; 1116 1117 /* 1118 * Save ourselves some traversal; does the current 1119 * region begin after the chunk to be freed ends? If so, 1120 * then we've already passed any possible region descriptors 1121 * that might have contained the chunk to be freed. 1122 */ 1123 if (rp->er_start > end) 1124 break; 1125 1126 /* Case 1. */ 1127 if ((start == rp->er_start) && (end == rp->er_end)) { 1128 LIST_REMOVE(rp, er_link); 1129 extent_free_region_descriptor(ex, rp); 1130 goto done; 1131 } 1132 1133 /* 1134 * The following cases all require that EXF_NOCOALESCE 1135 * is not set. 1136 */ 1137 if (!coalesce) 1138 continue; 1139 1140 /* Case 2. */ 1141 if ((start == rp->er_start) && (end < rp->er_end)) { 1142 rp->er_start = (end + 1); 1143 goto done; 1144 } 1145 1146 /* Case 3. */ 1147 if ((start > rp->er_start) && (end == rp->er_end)) { 1148 rp->er_end = (start - 1); 1149 goto done; 1150 } 1151 1152 /* Case 4. */ 1153 if ((start > rp->er_start) && (end < rp->er_end)) { 1154 /* Fill in new descriptor. */ 1155 nrp->er_start = end + 1; 1156 nrp->er_end = rp->er_end; 1157 1158 /* Adjust current descriptor. */ 1159 rp->er_end = start - 1; 1160 1161 /* Insert new descriptor after current. */ 1162 LIST_INSERT_AFTER(rp, nrp, er_link); 1163 1164 /* We used the new descriptor, so don't free it below */ 1165 nrp = NULL; 1166 goto done; 1167 } 1168 } 1169 1170 /* Region not found, or request otherwise invalid. */ 1171 simple_unlock(&ex->ex_slock); 1172 extent_print(ex); 1173 printf("extent_free: start 0x%lx, end 0x%lx\n", start, end); 1174 panic("extent_free: region not found"); 1175 1176 done: 1177 if (nrp != NULL) 1178 extent_free_region_descriptor(ex, nrp); 1179 if (ex->ex_flags & EXF_WANTED) { 1180 ex->ex_flags &= ~EXF_WANTED; 1181 wakeup(ex); 1182 } 1183 simple_unlock(&ex->ex_slock); 1184 return (0); 1185 } 1186 1187 void 1188 extent_print(struct extent *ex) 1189 { 1190 struct extent_region *rp; 1191 1192 if (ex == NULL) 1193 panic("extent_print: NULL extent"); 1194 1195 simple_lock(&ex->ex_slock); 1196 1197 printf("extent `%s' (0x%lx - 0x%lx), flags = 0x%x\n", ex->ex_name, 1198 ex->ex_start, ex->ex_end, ex->ex_flags); 1199 1200 LIST_FOREACH(rp, &ex->ex_regions, er_link) 1201 printf(" 0x%lx - 0x%lx\n", rp->er_start, rp->er_end); 1202 1203 simple_unlock(&ex->ex_slock); 1204 } 1205