1 /* $NetBSD: subr_blist.c,v 1.9 2006/01/20 14:19:40 yamt Exp $ */ 2 3 /*- 4 * Copyright (c) 1998 Matthew Dillon. All Rights Reserved. 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 4. Neither the name of the University nor the names of its contributors 14 * may be used to endorse or promote products derived from this software 15 * without specific prior written permission. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS 18 * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 19 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY 21 * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE 23 * GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 24 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 25 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 26 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 27 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28 */ 29 /* 30 * BLIST.C - Bitmap allocator/deallocator, using a radix tree with hinting 31 * 32 * This module implements a general bitmap allocator/deallocator. The 33 * allocator eats around 2 bits per 'block'. The module does not 34 * try to interpret the meaning of a 'block' other then to return 35 * BLIST_NONE on an allocation failure. 36 * 37 * A radix tree is used to maintain the bitmap. Two radix constants are 38 * involved: One for the bitmaps contained in the leaf nodes (typically 39 * 32), and one for the meta nodes (typically 16). Both meta and leaf 40 * nodes have a hint field. This field gives us a hint as to the largest 41 * free contiguous range of blocks under the node. It may contain a 42 * value that is too high, but will never contain a value that is too 43 * low. When the radix tree is searched, allocation failures in subtrees 44 * update the hint. 45 * 46 * The radix tree also implements two collapsed states for meta nodes: 47 * the ALL-ALLOCATED state and the ALL-FREE state. If a meta node is 48 * in either of these two states, all information contained underneath 49 * the node is considered stale. These states are used to optimize 50 * allocation and freeing operations. 51 * 52 * The hinting greatly increases code efficiency for allocations while 53 * the general radix structure optimizes both allocations and frees. The 54 * radix tree should be able to operate well no matter how much 55 * fragmentation there is and no matter how large a bitmap is used. 56 * 57 * Unlike the rlist code, the blist code wires all necessary memory at 58 * creation time. Neither allocations nor frees require interaction with 59 * the memory subsystem. In contrast, the rlist code may allocate memory 60 * on an rlist_free() call. The non-blocking features of the blist code 61 * are used to great advantage in the swap code (vm/nswap_pager.c). The 62 * rlist code uses a little less overall memory then the blist code (but 63 * due to swap interleaving not all that much less), but the blist code 64 * scales much, much better. 65 * 66 * LAYOUT: The radix tree is layed out recursively using a 67 * linear array. Each meta node is immediately followed (layed out 68 * sequentially in memory) by BLIST_META_RADIX lower level nodes. This 69 * is a recursive structure but one that can be easily scanned through 70 * a very simple 'skip' calculation. In order to support large radixes, 71 * portions of the tree may reside outside our memory allocation. We 72 * handle this with an early-termination optimization (when bighint is 73 * set to -1) on the scan. The memory allocation is only large enough 74 * to cover the number of blocks requested at creation time even if it 75 * must be encompassed in larger root-node radix. 76 * 77 * NOTE: the allocator cannot currently allocate more then 78 * BLIST_BMAP_RADIX blocks per call. It will panic with 'allocation too 79 * large' if you try. This is an area that could use improvement. The 80 * radix is large enough that this restriction does not effect the swap 81 * system, though. Currently only the allocation code is effected by 82 * this algorithmic unfeature. The freeing code can handle arbitrary 83 * ranges. 84 * 85 * This code can be compiled stand-alone for debugging. 86 */ 87 88 #include <sys/cdefs.h> 89 __KERNEL_RCSID(0, "$NetBSD: subr_blist.c,v 1.9 2006/01/20 14:19:40 yamt Exp $"); 90 #if 0 91 __FBSDID("$FreeBSD: src/sys/kern/subr_blist.c,v 1.17 2004/06/04 04:03:25 alc Exp $"); 92 #endif 93 94 #ifdef _KERNEL 95 96 #include <sys/param.h> 97 #include <sys/systm.h> 98 #include <sys/blist.h> 99 #include <sys/malloc.h> 100 101 #else 102 103 #ifndef BLIST_NO_DEBUG 104 #define BLIST_DEBUG 105 #endif 106 107 #include <sys/types.h> 108 #include <stdio.h> 109 #include <string.h> 110 #include <stdlib.h> 111 #include <stdarg.h> 112 #include <inttypes.h> 113 114 #define malloc(a,b,c) calloc(a, 1) 115 #define free(a,b) free(a) 116 117 #include "../sys/blist.h" 118 119 void panic(const char *ctl, ...); 120 121 #endif 122 123 /* 124 * blmeta and bl_bitmap_t MUST be a power of 2 in size. 125 */ 126 127 typedef struct blmeta { 128 union { 129 blist_blkno_t bmu_avail; /* space available under us */ 130 blist_bitmap_t bmu_bitmap; /* bitmap if we are a leaf */ 131 } u; 132 blist_blkno_t bm_bighint; /* biggest contiguous block hint*/ 133 } blmeta_t; 134 135 struct blist { 136 blist_blkno_t bl_blocks; /* area of coverage */ 137 blist_blkno_t bl_radix; /* coverage radix */ 138 blist_blkno_t bl_skip; /* starting skip */ 139 blist_blkno_t bl_free; /* number of free blocks */ 140 blmeta_t *bl_root; /* root of radix tree */ 141 blist_blkno_t bl_rootblks; /* blks allocated for tree */ 142 }; 143 144 #define BLIST_META_RADIX 16 145 146 /* 147 * static support functions 148 */ 149 150 static blist_blkno_t blst_leaf_alloc(blmeta_t *scan, blist_blkno_t blk, 151 int count); 152 static blist_blkno_t blst_meta_alloc(blmeta_t *scan, blist_blkno_t blk, 153 blist_blkno_t count, blist_blkno_t radix, blist_blkno_t skip); 154 static void blst_leaf_free(blmeta_t *scan, blist_blkno_t relblk, int count); 155 static void blst_meta_free(blmeta_t *scan, blist_blkno_t freeBlk, 156 blist_blkno_t count, blist_blkno_t radix, blist_blkno_t skip, 157 blist_blkno_t blk); 158 static void blst_copy(blmeta_t *scan, blist_blkno_t blk, blist_blkno_t radix, 159 blist_blkno_t skip, blist_t dest, blist_blkno_t count); 160 static int blst_leaf_fill(blmeta_t *scan, blist_blkno_t blk, int count); 161 static blist_blkno_t blst_meta_fill(blmeta_t *scan, blist_blkno_t allocBlk, 162 blist_blkno_t count, blist_blkno_t radix, blist_blkno_t skip, 163 blist_blkno_t blk); 164 static blist_blkno_t blst_radix_init(blmeta_t *scan, blist_blkno_t radix, 165 blist_blkno_t skip, blist_blkno_t count); 166 #ifndef _KERNEL 167 static void blst_radix_print(blmeta_t *scan, blist_blkno_t blk, 168 blist_blkno_t radix, blist_blkno_t skip, int tab); 169 #endif 170 171 #ifdef _KERNEL 172 static MALLOC_DEFINE(M_BLIST, "blist", "Bitmap allocator"); 173 #endif 174 175 /* 176 * blist_create() - create a blist capable of handling up to the specified 177 * number of blocks 178 * 179 * blocks must be greater then 0 180 * 181 * The smallest blist consists of a single leaf node capable of 182 * managing BLIST_BMAP_RADIX blocks. 183 */ 184 185 blist_t 186 blist_create(blist_blkno_t blocks) 187 { 188 blist_t bl; 189 blist_blkno_t radix; 190 blist_blkno_t skip = 0; 191 192 /* 193 * Calculate radix and skip field used for scanning. 194 * 195 * XXX check overflow 196 */ 197 radix = BLIST_BMAP_RADIX; 198 199 while (radix < blocks) { 200 radix *= BLIST_META_RADIX; 201 skip = (skip + 1) * BLIST_META_RADIX; 202 } 203 204 bl = malloc(sizeof(struct blist), M_BLIST, M_WAITOK | M_ZERO); 205 206 bl->bl_blocks = blocks; 207 bl->bl_radix = radix; 208 bl->bl_skip = skip; 209 bl->bl_rootblks = 1 + 210 blst_radix_init(NULL, bl->bl_radix, bl->bl_skip, blocks); 211 bl->bl_root = malloc(sizeof(blmeta_t) * bl->bl_rootblks, M_BLIST, M_WAITOK); 212 213 #if defined(BLIST_DEBUG) 214 printf( 215 "BLIST representing %" PRIu64 " blocks (%" PRIu64 " MB of swap)" 216 ", requiring %" PRIu64 "K of ram\n", 217 (uint64_t)bl->bl_blocks, 218 (uint64_t)bl->bl_blocks * 4 / 1024, 219 ((uint64_t)bl->bl_rootblks * sizeof(blmeta_t) + 1023) / 1024 220 ); 221 printf("BLIST raw radix tree contains %" PRIu64 " records\n", 222 (uint64_t)bl->bl_rootblks); 223 #endif 224 blst_radix_init(bl->bl_root, bl->bl_radix, bl->bl_skip, blocks); 225 226 return(bl); 227 } 228 229 void 230 blist_destroy(blist_t bl) 231 { 232 free(bl->bl_root, M_BLIST); 233 free(bl, M_BLIST); 234 } 235 236 /* 237 * blist_alloc() - reserve space in the block bitmap. Return the base 238 * of a contiguous region or BLIST_NONE if space could 239 * not be allocated. 240 */ 241 242 blist_blkno_t 243 blist_alloc(blist_t bl, blist_blkno_t count) 244 { 245 blist_blkno_t blk = BLIST_NONE; 246 247 if (bl) { 248 if (bl->bl_radix == BLIST_BMAP_RADIX) 249 blk = blst_leaf_alloc(bl->bl_root, 0, count); 250 else 251 blk = blst_meta_alloc(bl->bl_root, 0, count, bl->bl_radix, bl->bl_skip); 252 if (blk != BLIST_NONE) 253 bl->bl_free -= count; 254 } 255 return(blk); 256 } 257 258 /* 259 * blist_free() - free up space in the block bitmap. Return the base 260 * of a contiguous region. Panic if an inconsistancy is 261 * found. 262 */ 263 264 void 265 blist_free(blist_t bl, blist_blkno_t blkno, blist_blkno_t count) 266 { 267 if (bl) { 268 if (bl->bl_radix == BLIST_BMAP_RADIX) 269 blst_leaf_free(bl->bl_root, blkno, count); 270 else 271 blst_meta_free(bl->bl_root, blkno, count, bl->bl_radix, bl->bl_skip, 0); 272 bl->bl_free += count; 273 } 274 } 275 276 /* 277 * blist_fill() - mark a region in the block bitmap as off-limits 278 * to the allocator (i.e. allocate it), ignoring any 279 * existing allocations. Return the number of blocks 280 * actually filled that were free before the call. 281 */ 282 283 blist_blkno_t 284 blist_fill(blist_t bl, blist_blkno_t blkno, blist_blkno_t count) 285 { 286 blist_blkno_t filled; 287 288 if (bl) { 289 if (bl->bl_radix == BLIST_BMAP_RADIX) 290 filled = blst_leaf_fill(bl->bl_root, blkno, count); 291 else 292 filled = blst_meta_fill(bl->bl_root, blkno, count, 293 bl->bl_radix, bl->bl_skip, 0); 294 bl->bl_free -= filled; 295 return filled; 296 } else 297 return 0; 298 } 299 300 /* 301 * blist_resize() - resize an existing radix tree to handle the 302 * specified number of blocks. This will reallocate 303 * the tree and transfer the previous bitmap to the new 304 * one. When extending the tree you can specify whether 305 * the new blocks are to left allocated or freed. 306 */ 307 308 void 309 blist_resize(blist_t *pbl, blist_blkno_t count, int freenew) 310 { 311 blist_t newbl = blist_create(count); 312 blist_t save = *pbl; 313 314 *pbl = newbl; 315 if (count > save->bl_blocks) 316 count = save->bl_blocks; 317 blst_copy(save->bl_root, 0, save->bl_radix, save->bl_skip, newbl, count); 318 319 /* 320 * If resizing upwards, should we free the new space or not? 321 */ 322 if (freenew && count < newbl->bl_blocks) { 323 blist_free(newbl, count, newbl->bl_blocks - count); 324 } 325 blist_destroy(save); 326 } 327 328 #ifdef BLIST_DEBUG 329 330 /* 331 * blist_print() - dump radix tree 332 */ 333 334 void 335 blist_print(blist_t bl) 336 { 337 printf("BLIST {\n"); 338 blst_radix_print(bl->bl_root, 0, bl->bl_radix, bl->bl_skip, 4); 339 printf("}\n"); 340 } 341 342 #endif 343 344 /************************************************************************ 345 * ALLOCATION SUPPORT FUNCTIONS * 346 ************************************************************************ 347 * 348 * These support functions do all the actual work. They may seem 349 * rather longish, but that's because I've commented them up. The 350 * actual code is straight forward. 351 * 352 */ 353 354 /* 355 * blist_leaf_alloc() - allocate at a leaf in the radix tree (a bitmap). 356 * 357 * This is the core of the allocator and is optimized for the 1 block 358 * and the BLIST_BMAP_RADIX block allocation cases. Other cases are 359 * somewhat slower. The 1 block allocation case is log2 and extremely 360 * quick. 361 */ 362 363 static blist_blkno_t 364 blst_leaf_alloc( 365 blmeta_t *scan, 366 blist_blkno_t blk, 367 int count 368 ) { 369 blist_bitmap_t orig = scan->u.bmu_bitmap; 370 371 if (orig == 0) { 372 /* 373 * Optimize bitmap all-allocated case. Also, count = 1 374 * case assumes at least 1 bit is free in the bitmap, so 375 * we have to take care of this case here. 376 */ 377 scan->bm_bighint = 0; 378 return(BLIST_NONE); 379 } 380 if (count == 1) { 381 /* 382 * Optimized code to allocate one bit out of the bitmap 383 */ 384 blist_bitmap_t mask; 385 int j = BLIST_BMAP_RADIX/2; 386 int r = 0; 387 388 mask = (blist_bitmap_t)-1 >> (BLIST_BMAP_RADIX/2); 389 390 while (j) { 391 if ((orig & mask) == 0) { 392 r += j; 393 orig >>= j; 394 } 395 j >>= 1; 396 mask >>= j; 397 } 398 scan->u.bmu_bitmap &= ~((blist_bitmap_t)1 << r); 399 return(blk + r); 400 } 401 if (count <= BLIST_BMAP_RADIX) { 402 /* 403 * non-optimized code to allocate N bits out of the bitmap. 404 * The more bits, the faster the code runs. It will run 405 * the slowest allocating 2 bits, but since there aren't any 406 * memory ops in the core loop (or shouldn't be, anyway), 407 * you probably won't notice the difference. 408 */ 409 int j; 410 int n = BLIST_BMAP_RADIX - count; 411 blist_bitmap_t mask; 412 413 mask = (blist_bitmap_t)-1 >> n; 414 415 for (j = 0; j <= n; ++j) { 416 if ((orig & mask) == mask) { 417 scan->u.bmu_bitmap &= ~mask; 418 return(blk + j); 419 } 420 mask = (mask << 1); 421 } 422 } 423 /* 424 * We couldn't allocate count in this subtree, update bighint. 425 */ 426 scan->bm_bighint = count - 1; 427 return(BLIST_NONE); 428 } 429 430 /* 431 * blist_meta_alloc() - allocate at a meta in the radix tree. 432 * 433 * Attempt to allocate at a meta node. If we can't, we update 434 * bighint and return a failure. Updating bighint optimize future 435 * calls that hit this node. We have to check for our collapse cases 436 * and we have a few optimizations strewn in as well. 437 */ 438 439 static blist_blkno_t 440 blst_meta_alloc( 441 blmeta_t *scan, 442 blist_blkno_t blk, 443 blist_blkno_t count, 444 blist_blkno_t radix, 445 blist_blkno_t skip 446 ) { 447 blist_blkno_t i; 448 blist_blkno_t next_skip = (skip / BLIST_META_RADIX); 449 450 if (scan->u.bmu_avail == 0) { 451 /* 452 * ALL-ALLOCATED special case 453 */ 454 scan->bm_bighint = count; 455 return(BLIST_NONE); 456 } 457 458 if (scan->u.bmu_avail == radix) { 459 radix /= BLIST_META_RADIX; 460 461 /* 462 * ALL-FREE special case, initialize uninitialize 463 * sublevel. 464 */ 465 for (i = 1; i <= skip; i += next_skip) { 466 if (scan[i].bm_bighint == (blist_blkno_t)-1) 467 break; 468 if (next_skip == 1) { 469 scan[i].u.bmu_bitmap = (blist_bitmap_t)-1; 470 scan[i].bm_bighint = BLIST_BMAP_RADIX; 471 } else { 472 scan[i].bm_bighint = radix; 473 scan[i].u.bmu_avail = radix; 474 } 475 } 476 } else { 477 radix /= BLIST_META_RADIX; 478 } 479 480 for (i = 1; i <= skip; i += next_skip) { 481 if (scan[i].bm_bighint == (blist_blkno_t)-1) { 482 /* 483 * Terminator 484 */ 485 break; 486 } else if (count <= scan[i].bm_bighint) { 487 /* 488 * count fits in object 489 */ 490 blist_blkno_t r; 491 if (next_skip == 1) { 492 r = blst_leaf_alloc(&scan[i], blk, count); 493 } else { 494 r = blst_meta_alloc(&scan[i], blk, count, radix, next_skip - 1); 495 } 496 if (r != BLIST_NONE) { 497 scan->u.bmu_avail -= count; 498 if (scan->bm_bighint > scan->u.bmu_avail) 499 scan->bm_bighint = scan->u.bmu_avail; 500 return(r); 501 } 502 } else if (count > radix) { 503 /* 504 * count does not fit in object even if it were 505 * complete free. 506 */ 507 panic("blist_meta_alloc: allocation too large"); 508 } 509 blk += radix; 510 } 511 512 /* 513 * We couldn't allocate count in this subtree, update bighint. 514 */ 515 if (scan->bm_bighint >= count) 516 scan->bm_bighint = count - 1; 517 return(BLIST_NONE); 518 } 519 520 /* 521 * BLST_LEAF_FREE() - free allocated block from leaf bitmap 522 * 523 */ 524 525 static void 526 blst_leaf_free( 527 blmeta_t *scan, 528 blist_blkno_t blk, 529 int count 530 ) { 531 /* 532 * free some data in this bitmap 533 * 534 * e.g. 535 * 0000111111111110000 536 * \_________/\__/ 537 * v n 538 */ 539 int n = blk & (BLIST_BMAP_RADIX - 1); 540 blist_bitmap_t mask; 541 542 mask = ((blist_bitmap_t)-1 << n) & 543 ((blist_bitmap_t)-1 >> (BLIST_BMAP_RADIX - count - n)); 544 545 if (scan->u.bmu_bitmap & mask) 546 panic("blst_radix_free: freeing free block"); 547 scan->u.bmu_bitmap |= mask; 548 549 /* 550 * We could probably do a better job here. We are required to make 551 * bighint at least as large as the biggest contiguous block of 552 * data. If we just shoehorn it, a little extra overhead will 553 * be incured on the next allocation (but only that one typically). 554 */ 555 scan->bm_bighint = BLIST_BMAP_RADIX; 556 } 557 558 /* 559 * BLST_META_FREE() - free allocated blocks from radix tree meta info 560 * 561 * This support routine frees a range of blocks from the bitmap. 562 * The range must be entirely enclosed by this radix node. If a 563 * meta node, we break the range down recursively to free blocks 564 * in subnodes (which means that this code can free an arbitrary 565 * range whereas the allocation code cannot allocate an arbitrary 566 * range). 567 */ 568 569 static void 570 blst_meta_free( 571 blmeta_t *scan, 572 blist_blkno_t freeBlk, 573 blist_blkno_t count, 574 blist_blkno_t radix, 575 blist_blkno_t skip, 576 blist_blkno_t blk 577 ) { 578 blist_blkno_t i; 579 blist_blkno_t next_skip = (skip / BLIST_META_RADIX); 580 581 #if 0 582 printf("FREE (%" PRIx64 ",%" PRIu64 583 ") FROM (%" PRIx64 ",%" PRIu64 ")\n", 584 (uint64_t)freeBlk, (uint64_t)count, 585 (uint64_t)blk, (uint64_t)radix 586 ); 587 #endif 588 589 if (scan->u.bmu_avail == 0) { 590 /* 591 * ALL-ALLOCATED special case, with possible 592 * shortcut to ALL-FREE special case. 593 */ 594 scan->u.bmu_avail = count; 595 scan->bm_bighint = count; 596 597 if (count != radix) { 598 for (i = 1; i <= skip; i += next_skip) { 599 if (scan[i].bm_bighint == (blist_blkno_t)-1) 600 break; 601 scan[i].bm_bighint = 0; 602 if (next_skip == 1) { 603 scan[i].u.bmu_bitmap = 0; 604 } else { 605 scan[i].u.bmu_avail = 0; 606 } 607 } 608 /* fall through */ 609 } 610 } else { 611 scan->u.bmu_avail += count; 612 /* scan->bm_bighint = radix; */ 613 } 614 615 /* 616 * ALL-FREE special case. 617 */ 618 619 if (scan->u.bmu_avail == radix) 620 return; 621 if (scan->u.bmu_avail > radix) 622 panic("blst_meta_free: freeing already free blocks (%" 623 PRIu64 ") %" PRIu64 "/%" PRIu64, 624 (uint64_t)count, 625 (uint64_t)scan->u.bmu_avail, 626 (uint64_t)radix); 627 628 /* 629 * Break the free down into its components 630 */ 631 632 radix /= BLIST_META_RADIX; 633 634 i = (freeBlk - blk) / radix; 635 blk += i * radix; 636 i = i * next_skip + 1; 637 638 while (i <= skip && blk < freeBlk + count) { 639 blist_blkno_t v; 640 641 v = blk + radix - freeBlk; 642 if (v > count) 643 v = count; 644 645 if (scan->bm_bighint == (blist_blkno_t)-1) 646 panic("blst_meta_free: freeing unexpected range"); 647 648 if (next_skip == 1) { 649 blst_leaf_free(&scan[i], freeBlk, v); 650 } else { 651 blst_meta_free(&scan[i], freeBlk, v, radix, next_skip - 1, blk); 652 } 653 if (scan->bm_bighint < scan[i].bm_bighint) 654 scan->bm_bighint = scan[i].bm_bighint; 655 count -= v; 656 freeBlk += v; 657 blk += radix; 658 i += next_skip; 659 } 660 } 661 662 /* 663 * BLIST_RADIX_COPY() - copy one radix tree to another 664 * 665 * Locates free space in the source tree and frees it in the destination 666 * tree. The space may not already be free in the destination. 667 */ 668 669 static void blst_copy( 670 blmeta_t *scan, 671 blist_blkno_t blk, 672 blist_blkno_t radix, 673 blist_blkno_t skip, 674 blist_t dest, 675 blist_blkno_t count 676 ) { 677 blist_blkno_t next_skip; 678 blist_blkno_t i; 679 680 /* 681 * Leaf node 682 */ 683 684 if (radix == BLIST_BMAP_RADIX) { 685 blist_bitmap_t v = scan->u.bmu_bitmap; 686 687 if (v == (blist_bitmap_t)-1) { 688 blist_free(dest, blk, count); 689 } else if (v != 0) { 690 int j; 691 692 for (j = 0; j < BLIST_BMAP_RADIX && j < count; ++j) { 693 if (v & (1 << j)) 694 blist_free(dest, blk + j, 1); 695 } 696 } 697 return; 698 } 699 700 /* 701 * Meta node 702 */ 703 704 if (scan->u.bmu_avail == 0) { 705 /* 706 * Source all allocated, leave dest allocated 707 */ 708 return; 709 } 710 if (scan->u.bmu_avail == radix) { 711 /* 712 * Source all free, free entire dest 713 */ 714 if (count < radix) 715 blist_free(dest, blk, count); 716 else 717 blist_free(dest, blk, radix); 718 return; 719 } 720 721 722 radix /= BLIST_META_RADIX; 723 next_skip = (skip / BLIST_META_RADIX); 724 725 for (i = 1; count && i <= skip; i += next_skip) { 726 if (scan[i].bm_bighint == (blist_blkno_t)-1) 727 break; 728 729 if (count >= radix) { 730 blst_copy( 731 &scan[i], 732 blk, 733 radix, 734 next_skip - 1, 735 dest, 736 radix 737 ); 738 count -= radix; 739 } else { 740 if (count) { 741 blst_copy( 742 &scan[i], 743 blk, 744 radix, 745 next_skip - 1, 746 dest, 747 count 748 ); 749 } 750 count = 0; 751 } 752 blk += radix; 753 } 754 } 755 756 /* 757 * BLST_LEAF_FILL() - allocate specific blocks in leaf bitmap 758 * 759 * This routine allocates all blocks in the specified range 760 * regardless of any existing allocations in that range. Returns 761 * the number of blocks allocated by the call. 762 */ 763 764 static int 765 blst_leaf_fill(blmeta_t *scan, blist_blkno_t blk, int count) 766 { 767 int n = blk & (BLIST_BMAP_RADIX - 1); 768 int nblks; 769 blist_bitmap_t mask, bitmap; 770 771 mask = ((blist_bitmap_t)-1 << n) & 772 ((blist_bitmap_t)-1 >> (BLIST_BMAP_RADIX - count - n)); 773 774 /* Count the number of blocks we're about to allocate */ 775 bitmap = scan->u.bmu_bitmap & mask; 776 for (nblks = 0; bitmap != 0; nblks++) 777 bitmap &= bitmap - 1; 778 779 scan->u.bmu_bitmap &= ~mask; 780 return nblks; 781 } 782 783 /* 784 * BLIST_META_FILL() - allocate specific blocks at a meta node 785 * 786 * This routine allocates the specified range of blocks, 787 * regardless of any existing allocations in the range. The 788 * range must be within the extent of this node. Returns the 789 * number of blocks allocated by the call. 790 */ 791 static blist_blkno_t 792 blst_meta_fill( 793 blmeta_t *scan, 794 blist_blkno_t allocBlk, 795 blist_blkno_t count, 796 blist_blkno_t radix, 797 blist_blkno_t skip, 798 blist_blkno_t blk 799 ) { 800 blist_blkno_t i; 801 blist_blkno_t next_skip = (skip / BLIST_META_RADIX); 802 blist_blkno_t nblks = 0; 803 804 if (count == radix || scan->u.bmu_avail == 0) { 805 /* 806 * ALL-ALLOCATED special case 807 */ 808 nblks = scan->u.bmu_avail; 809 scan->u.bmu_avail = 0; 810 scan->bm_bighint = count; 811 return nblks; 812 } 813 814 if (count > radix) 815 panic("blist_meta_fill: allocation too large"); 816 817 if (scan->u.bmu_avail == radix) { 818 radix /= BLIST_META_RADIX; 819 820 /* 821 * ALL-FREE special case, initialize sublevel 822 */ 823 for (i = 1; i <= skip; i += next_skip) { 824 if (scan[i].bm_bighint == (blist_blkno_t)-1) 825 break; 826 if (next_skip == 1) { 827 scan[i].u.bmu_bitmap = (blist_bitmap_t)-1; 828 scan[i].bm_bighint = BLIST_BMAP_RADIX; 829 } else { 830 scan[i].bm_bighint = radix; 831 scan[i].u.bmu_avail = radix; 832 } 833 } 834 } else { 835 radix /= BLIST_META_RADIX; 836 } 837 838 i = (allocBlk - blk) / radix; 839 blk += i * radix; 840 i = i * next_skip + 1; 841 842 while (i <= skip && blk < allocBlk + count) { 843 blist_blkno_t v; 844 845 v = blk + radix - allocBlk; 846 if (v > count) 847 v = count; 848 849 if (scan->bm_bighint == (blist_blkno_t)-1) 850 panic("blst_meta_fill: filling unexpected range"); 851 852 if (next_skip == 1) { 853 nblks += blst_leaf_fill(&scan[i], allocBlk, v); 854 } else { 855 nblks += blst_meta_fill(&scan[i], allocBlk, v, 856 radix, next_skip - 1, blk); 857 } 858 count -= v; 859 allocBlk += v; 860 blk += radix; 861 i += next_skip; 862 } 863 scan->u.bmu_avail -= nblks; 864 return nblks; 865 } 866 867 /* 868 * BLST_RADIX_INIT() - initialize radix tree 869 * 870 * Initialize our meta structures and bitmaps and calculate the exact 871 * amount of space required to manage 'count' blocks - this space may 872 * be considerably less then the calculated radix due to the large 873 * RADIX values we use. 874 */ 875 876 static blist_blkno_t 877 blst_radix_init(blmeta_t *scan, blist_blkno_t radix, blist_blkno_t skip, 878 blist_blkno_t count) 879 { 880 blist_blkno_t i; 881 blist_blkno_t next_skip; 882 blist_blkno_t memindex = 0; 883 884 /* 885 * Leaf node 886 */ 887 888 if (radix == BLIST_BMAP_RADIX) { 889 if (scan) { 890 scan->bm_bighint = 0; 891 scan->u.bmu_bitmap = 0; 892 } 893 return(memindex); 894 } 895 896 /* 897 * Meta node. If allocating the entire object we can special 898 * case it. However, we need to figure out how much memory 899 * is required to manage 'count' blocks, so we continue on anyway. 900 */ 901 902 if (scan) { 903 scan->bm_bighint = 0; 904 scan->u.bmu_avail = 0; 905 } 906 907 radix /= BLIST_META_RADIX; 908 next_skip = (skip / BLIST_META_RADIX); 909 910 for (i = 1; i <= skip; i += next_skip) { 911 if (count >= radix) { 912 /* 913 * Allocate the entire object 914 */ 915 memindex = i + blst_radix_init( 916 ((scan) ? &scan[i] : NULL), 917 radix, 918 next_skip - 1, 919 radix 920 ); 921 count -= radix; 922 } else if (count > 0) { 923 /* 924 * Allocate a partial object 925 */ 926 memindex = i + blst_radix_init( 927 ((scan) ? &scan[i] : NULL), 928 radix, 929 next_skip - 1, 930 count 931 ); 932 count = 0; 933 } else { 934 /* 935 * Add terminator and break out 936 */ 937 if (scan) 938 scan[i].bm_bighint = (blist_blkno_t)-1; 939 break; 940 } 941 } 942 if (memindex < i) 943 memindex = i; 944 return(memindex); 945 } 946 947 #ifdef BLIST_DEBUG 948 949 static void 950 blst_radix_print(blmeta_t *scan, blist_blkno_t blk, blist_blkno_t radix, 951 blist_blkno_t skip, int tab) 952 { 953 blist_blkno_t i; 954 blist_blkno_t next_skip; 955 int lastState = 0; 956 957 if (radix == BLIST_BMAP_RADIX) { 958 printf( 959 "%*.*s(%0*" PRIx64 ",%" PRIu64 960 "): bitmap %0*" PRIx64 " big=%" PRIu64 "\n", 961 tab, tab, "", 962 sizeof(blk) * 2, 963 (uint64_t)blk, 964 (uint64_t)radix, 965 sizeof(scan->u.bmu_bitmap) * 2, 966 (uint64_t)scan->u.bmu_bitmap, 967 (uint64_t)scan->bm_bighint 968 ); 969 return; 970 } 971 972 if (scan->u.bmu_avail == 0) { 973 printf( 974 "%*.*s(%0*" PRIx64 ",%" PRIu64") ALL ALLOCATED\n", 975 tab, tab, "", 976 sizeof(blk) * 2, 977 (uint64_t)blk, 978 (uint64_t)radix 979 ); 980 return; 981 } 982 if (scan->u.bmu_avail == radix) { 983 printf( 984 "%*.*s(%0*" PRIx64 ",%" PRIu64 ") ALL FREE\n", 985 tab, tab, "", 986 sizeof(blk) * 2, 987 (uint64_t)blk, 988 (uint64_t)radix 989 ); 990 return; 991 } 992 993 printf( 994 "%*.*s(%0*" PRIx64 ",%" PRIu64 "): subtree (%" PRIu64 "/%" 995 PRIu64 ") big=%" PRIu64 " {\n", 996 tab, tab, "", 997 sizeof(blk) * 2, 998 (uint64_t)blk, 999 (uint64_t)radix, 1000 (uint64_t)scan->u.bmu_avail, 1001 (uint64_t)radix, 1002 (uint64_t)scan->bm_bighint 1003 ); 1004 1005 radix /= BLIST_META_RADIX; 1006 next_skip = (skip / BLIST_META_RADIX); 1007 tab += 4; 1008 1009 for (i = 1; i <= skip; i += next_skip) { 1010 if (scan[i].bm_bighint == (blist_blkno_t)-1) { 1011 printf( 1012 "%*.*s(%0*" PRIx64 ",%" PRIu64 "): Terminator\n", 1013 tab, tab, "", 1014 sizeof(blk) * 2, 1015 (uint64_t)blk, 1016 (uint64_t)radix 1017 ); 1018 lastState = 0; 1019 break; 1020 } 1021 blst_radix_print( 1022 &scan[i], 1023 blk, 1024 radix, 1025 next_skip - 1, 1026 tab 1027 ); 1028 blk += radix; 1029 } 1030 tab -= 4; 1031 1032 printf( 1033 "%*.*s}\n", 1034 tab, tab, "" 1035 ); 1036 } 1037 1038 #endif 1039 1040 #ifdef BLIST_DEBUG 1041 1042 int 1043 main(int ac, char **av) 1044 { 1045 blist_blkno_t size = 1024; 1046 int i; 1047 blist_t bl; 1048 1049 for (i = 1; i < ac; ++i) { 1050 const char *ptr = av[i]; 1051 if (*ptr != '-') { 1052 size = strtol(ptr, NULL, 0); 1053 continue; 1054 } 1055 ptr += 2; 1056 fprintf(stderr, "Bad option: %s\n", ptr - 2); 1057 exit(1); 1058 } 1059 bl = blist_create(size); 1060 blist_free(bl, 0, size); 1061 1062 for (;;) { 1063 char buf[1024]; 1064 uint64_t da = 0; 1065 uint64_t count = 0; 1066 1067 printf("%" PRIu64 "/%" PRIu64 "/%" PRIu64 "> ", 1068 (uint64_t)bl->bl_free, 1069 (uint64_t)size, 1070 (uint64_t)bl->bl_radix); 1071 fflush(stdout); 1072 if (fgets(buf, sizeof(buf), stdin) == NULL) 1073 break; 1074 switch(buf[0]) { 1075 case 'r': 1076 if (sscanf(buf + 1, "%" SCNu64, &count) == 1) { 1077 blist_resize(&bl, count, 1); 1078 } else { 1079 printf("?\n"); 1080 } 1081 case 'p': 1082 blist_print(bl); 1083 break; 1084 case 'a': 1085 if (sscanf(buf + 1, "%" SCNu64, &count) == 1) { 1086 blist_blkno_t blk = blist_alloc(bl, count); 1087 printf(" R=%0*" PRIx64 "\n", 1088 sizeof(blk) * 2, 1089 (uint64_t)blk); 1090 } else { 1091 printf("?\n"); 1092 } 1093 break; 1094 case 'f': 1095 if (sscanf(buf + 1, "%" SCNx64 " %" SCNu64, 1096 &da, &count) == 2) { 1097 blist_free(bl, da, count); 1098 } else { 1099 printf("?\n"); 1100 } 1101 break; 1102 case 'l': 1103 if (sscanf(buf + 1, "%" SCNx64 " %" SCNu64, 1104 &da, &count) == 2) { 1105 printf(" n=%" PRIu64 "\n", 1106 (uint64_t)blist_fill(bl, da, count)); 1107 } else { 1108 printf("?\n"); 1109 } 1110 break; 1111 case '?': 1112 case 'h': 1113 puts( 1114 "p -print\n" 1115 "a %d -allocate\n" 1116 "f %x %d -free\n" 1117 "l %x %d -fill\n" 1118 "r %d -resize\n" 1119 "h/? -help" 1120 ); 1121 break; 1122 default: 1123 printf("?\n"); 1124 break; 1125 } 1126 } 1127 return(0); 1128 } 1129 1130 void 1131 panic(const char *ctl, ...) 1132 { 1133 va_list va; 1134 1135 va_start(va, ctl); 1136 vfprintf(stderr, ctl, va); 1137 fprintf(stderr, "\n"); 1138 va_end(va); 1139 exit(1); 1140 } 1141 1142 #endif 1143 1144