1 /* $OpenBSD: subr_blist.c,v 1.4 2023/05/30 08:30:01 jsg Exp $ */ 2 /* DragonFlyBSD:7b80531f545c7d3c51c1660130c71d01f6bccbe0:/sys/kern/subr_blist.c */ 3 /* 4 * BLIST.C - Bitmap allocator/deallocator, using a radix tree with hinting 5 * 6 * Copyright (c) 1998,2004 The DragonFly Project. All rights reserved. 7 * 8 * This code is derived from software contributed to The DragonFly Project 9 * by Matthew Dillon <dillon@backplane.com> 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 15 * 1. Redistributions of source code must retain the above copyright 16 * notice, this list of conditions and the following disclaimer. 17 * 2. Redistributions in binary form must reproduce the above copyright 18 * notice, this list of conditions and the following disclaimer in 19 * the documentation and/or other materials provided with the 20 * distribution. 21 * 3. Neither the name of The DragonFly Project nor the names of its 22 * contributors may be used to endorse or promote products derived 23 * from this software without specific, prior written permission. 24 * 25 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 26 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 27 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 28 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 29 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 30 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, 31 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 32 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 33 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 34 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 35 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 36 * SUCH DAMAGE. 37 * 38 * 39 * This module implements a general bitmap allocator/deallocator. The 40 * allocator eats around 2 bits per 'block'. The module does not 41 * try to interpret the meaning of a 'block' other than to return 42 * SWAPBLK_NONE on an allocation failure. 43 * 44 * A radix tree is used to maintain the bitmap. Two radix constants are 45 * involved: One for the bitmaps contained in the leaf nodes (typically 46 * 32), and one for the meta nodes (typically 16). Both meta and leaf 47 * nodes have a hint field. This field gives us a hint as to the largest 48 * free contiguous range of blocks under the node. It may contain a 49 * value that is too high, but will never contain a value that is too 50 * low. When the radix tree is searched, allocation failures in subtrees 51 * update the hint. 52 * 53 * The radix tree also implements two collapsed states for meta nodes: 54 * the ALL-ALLOCATED state and the ALL-FREE state. If a meta node is 55 * in either of these two states, all information contained underneath 56 * the node is considered stale. These states are used to optimize 57 * allocation and freeing operations. 58 * 59 * The hinting greatly increases code efficiency for allocations while 60 * the general radix structure optimizes both allocations and frees. The 61 * radix tree should be able to operate well no matter how much 62 * fragmentation there is and no matter how large a bitmap is used. 63 * 64 * Unlike the rlist code, the blist code wires all necessary memory at 65 * creation time. Neither allocations nor frees require interaction with 66 * the memory subsystem. In contrast, the rlist code may allocate memory 67 * on an blist_free() call. The non-blocking features of the blist code 68 * are used to great advantage in the swap code (uvm/uvm_swap.c). The 69 * rlist code uses a little less overall memory than the blist code (but 70 * due to swap interleaving not all that much less), but the blist code 71 * scales much, much better. 72 * 73 * LAYOUT: The radix tree is laid out recursively using a 74 * linear array. Each meta node is immediately followed (laid out 75 * sequentially in memory) by BLIST_META_RADIX lower level nodes. This 76 * is a recursive structure but one that can be easily scanned through 77 * a very simple 'skip' calculation. In order to support large radixes, 78 * portions of the tree may reside outside our memory allocation. We 79 * handle this with an early-termination optimization (when bighint is 80 * set to -1) on the scan. The memory allocation is only large enough 81 * to cover the number of blocks requested at creation time even if it 82 * must be encompassed in larger root-node radix. 83 * 84 * NOTE: The allocator cannot currently allocate more than 85 * BLIST_BMAP_RADIX blocks per call. It will panic with 'allocation too 86 * large' if you try. This is an area that could use improvement. The 87 * radix is large enough that this restriction does not effect the swap 88 * system, though. Currently only the allocation code is effected by 89 * this algorithmic unfeature. The freeing code can handle arbitrary 90 * ranges. 91 * 92 * NOTE: The radix may exceed BLIST_BMAP_RADIX bits in order to support 93 * up to 2^(BLIST_BMAP_RADIX-1) blocks. The first division will 94 * drop the radix down and fit it within a signed BLIST_BMAP_RADIX 95 * bit integer. 96 * 97 * This code can be compiled stand-alone for debugging. 98 */ 99 100 #ifdef _KERNEL 101 102 #include <sys/param.h> 103 #include <sys/systm.h> 104 #include <sys/blist.h> 105 #include <sys/malloc.h> 106 107 #else 108 109 #ifndef BLIST_NO_DEBUG 110 #define BLIST_DEBUG 111 #endif 112 113 #include <sys/types.h> 114 #include <assert.h> 115 #include <err.h> 116 #include <stdio.h> 117 #include <string.h> 118 #include <stdlib.h> 119 #include <stdarg.h> 120 #include <limits.h> 121 122 #define malloc(s,t,f) calloc(1, s) 123 #define mallocarray(n,s,t,f) reallocarray(NULL, n, s) 124 #define free(p,t,s) free(p) 125 #define KASSERT(exp) assert(exp) 126 #define KDASSERT(exp) assert(exp) 127 128 #include "../sys/blist.h" 129 130 #define panic(...) do { errx(1, __VA_ARGS__); } while (0) 131 132 #endif 133 134 /* 135 * static support functions 136 */ 137 138 static swblk_t blst_leaf_alloc(blmeta_t *scan, swblk_t blkat, 139 swblk_t blk, swblk_t count); 140 static swblk_t blst_meta_alloc(blmeta_t *scan, swblk_t blkat, 141 swblk_t blk, swblk_t count, 142 swblk_t radix, swblk_t skip); 143 static void blst_leaf_free(blmeta_t *scan, swblk_t relblk, swblk_t count); 144 static void blst_meta_free(blmeta_t *scan, swblk_t freeBlk, swblk_t count, 145 swblk_t radix, swblk_t skip, 146 swblk_t blk); 147 static swblk_t blst_leaf_fill(blmeta_t *scan, swblk_t blk, swblk_t count); 148 static swblk_t blst_meta_fill(blmeta_t *scan, swblk_t fillBlk, swblk_t count, 149 swblk_t radix, swblk_t skip, 150 swblk_t blk); 151 static void blst_copy(blmeta_t *scan, swblk_t blk, swblk_t radix, 152 swblk_t skip, blist_t dest, swblk_t count); 153 static swblk_t blst_radix_init(blmeta_t *scan, swblk_t radix, 154 swblk_t skip, swblk_t count); 155 static int blst_radix_gapfind(blmeta_t *scan, swblk_t blk, swblk_t radix, swblk_t skip, 156 int state, swblk_t *maxbp, swblk_t *maxep, swblk_t *bp, swblk_t *ep); 157 158 #if defined(BLIST_DEBUG) || defined(DDB) 159 static void blst_radix_print(blmeta_t *scan, swblk_t blk, 160 swblk_t radix, swblk_t skip, int tab); 161 #endif 162 163 /* 164 * blist_create() - create a blist capable of handling up to the specified 165 * number of blocks 166 * 167 * blocks must be greater than 0 168 * 169 * The smallest blist consists of a single leaf node capable of 170 * managing BLIST_BMAP_RADIX blocks. 171 * 172 * The pages are addressable in range [0, nblocks[ 173 */ 174 175 blist_t 176 blist_create(swblk_t blocks) 177 { 178 blist_t bl; 179 swblk_t radix; 180 swblk_t skip = 0; 181 182 KASSERT(blocks > 0); 183 184 /* 185 * Calculate radix and skip field used for scanning. 186 * 187 * Radix can exceed BLIST_BMAP_RADIX bits even if swblk_t is limited 188 * to BLIST_BMAP_RADIX bits. 189 * 190 * XXX check overflow 191 */ 192 radix = BLIST_BMAP_RADIX; 193 194 while (radix < blocks) { 195 radix *= BLIST_META_RADIX; 196 skip = (skip + 1) * BLIST_META_RADIX; 197 KASSERT(skip > 0); 198 } 199 200 bl = malloc(sizeof(struct blist), M_VMSWAP, M_WAITOK | M_ZERO); 201 202 bl->bl_blocks = blocks; 203 bl->bl_radix = radix; 204 bl->bl_skip = skip; 205 bl->bl_rootblks = 1 + 206 blst_radix_init(NULL, bl->bl_radix, bl->bl_skip, blocks); 207 bl->bl_root = mallocarray(bl->bl_rootblks, sizeof(blmeta_t), 208 M_VMSWAP, M_WAITOK); 209 210 #if defined(BLIST_DEBUG) 211 printf( 212 "BLIST representing %lu blocks (%lu MB of swap)" 213 ", requiring %6.2fM of ram\n", 214 bl->bl_blocks, 215 bl->bl_blocks * 4 / 1024, 216 (bl->bl_rootblks * sizeof(blmeta_t) + 1023) / (1024.0 * 1024.0) 217 ); 218 printf("BLIST raw radix tree: %lu records, top-radix %lu\n", 219 bl->bl_rootblks, bl->bl_radix); 220 #endif 221 blst_radix_init(bl->bl_root, bl->bl_radix, bl->bl_skip, blocks); 222 223 return(bl); 224 } 225 226 void 227 blist_destroy(blist_t bl) 228 { 229 KASSERT(bl != NULL); 230 231 free(bl->bl_root, M_VMSWAP, sizeof(blmeta_t) * bl->bl_rootblks); 232 free(bl, M_VMSWAP, sizeof(struct blist)); 233 } 234 235 /* 236 * blist_alloc() - reserve space in the block bitmap. Return the base 237 * of a contiguous region or SWAPBLK_NONE if space could 238 * not be allocated. 239 */ 240 241 swblk_t 242 blist_alloc(blist_t bl, swblk_t count) 243 { 244 swblk_t blk = SWAPBLK_NONE; 245 246 if (bl) { 247 if (bl->bl_radix == BLIST_BMAP_RADIX) 248 blk = blst_leaf_alloc(bl->bl_root, 0, 0, count); 249 else 250 blk = blst_meta_alloc(bl->bl_root, 0, 0, count, 251 bl->bl_radix, bl->bl_skip); 252 if (blk != SWAPBLK_NONE) { 253 bl->bl_free -= count; 254 255 KDASSERT(blk < bl->bl_blocks); 256 KDASSERT(bl->bl_free <= bl->bl_blocks); 257 } 258 } 259 return(blk); 260 } 261 262 swblk_t 263 blist_allocat(blist_t bl, swblk_t count, swblk_t blkat) 264 { 265 swblk_t blk = SWAPBLK_NONE; 266 267 if (bl) { 268 KDASSERT(blkat < bl->bl_blocks); 269 KDASSERT(blkat + count <= bl->bl_blocks); 270 271 if (bl->bl_radix == BLIST_BMAP_RADIX) 272 blk = blst_leaf_alloc(bl->bl_root, blkat, 0, count); 273 else 274 blk = blst_meta_alloc(bl->bl_root, blkat, 0, count, 275 bl->bl_radix, bl->bl_skip); 276 if (blk != SWAPBLK_NONE) { 277 bl->bl_free -= count; 278 279 KDASSERT(blk < bl->bl_blocks); 280 KDASSERT(bl->bl_free <= bl->bl_blocks); 281 } 282 } 283 return(blk); 284 } 285 286 /* 287 * blist_free() - free up space in the block bitmap. Return the base 288 * of a contiguous region. Panic if an inconsistency is 289 * found. 290 */ 291 292 void 293 blist_free(blist_t bl, swblk_t blkno, swblk_t count) 294 { 295 if (bl) { 296 KDASSERT(blkno < bl->bl_blocks); 297 KDASSERT(blkno + count <= bl->bl_blocks); 298 299 if (bl->bl_radix == BLIST_BMAP_RADIX) 300 blst_leaf_free(bl->bl_root, blkno, count); 301 else 302 blst_meta_free(bl->bl_root, blkno, count, bl->bl_radix, bl->bl_skip, 0); 303 bl->bl_free += count; 304 305 KDASSERT(bl->bl_free <= bl->bl_blocks); 306 } 307 } 308 309 /* 310 * blist_fill() - mark a region in the block bitmap as off-limits 311 * to the allocator (i.e. allocate it), ignoring any 312 * existing allocations. Return the number of blocks 313 * actually filled that were free before the call. 314 */ 315 316 swblk_t 317 blist_fill(blist_t bl, swblk_t blkno, swblk_t count) 318 { 319 swblk_t filled; 320 321 if (bl) { 322 KDASSERT(blkno < bl->bl_blocks); 323 KDASSERT(blkno + count <= bl->bl_blocks); 324 325 if (bl->bl_radix == BLIST_BMAP_RADIX) { 326 filled = blst_leaf_fill(bl->bl_root, blkno, count); 327 } else { 328 filled = blst_meta_fill(bl->bl_root, blkno, count, 329 bl->bl_radix, bl->bl_skip, 0); 330 } 331 bl->bl_free -= filled; 332 KDASSERT(bl->bl_free <= bl->bl_blocks); 333 return (filled); 334 } else { 335 return 0; 336 } 337 } 338 339 /* 340 * blist_resize() - resize an existing radix tree to handle the 341 * specified number of blocks. This will reallocate 342 * the tree and transfer the previous bitmap to the new 343 * one. When extending the tree you can specify whether 344 * the new blocks are to left allocated or freed. 345 */ 346 347 void 348 blist_resize(blist_t *pbl, swblk_t count, int freenew) 349 { 350 blist_t newbl = blist_create(count); 351 blist_t save = *pbl; 352 353 *pbl = newbl; 354 if (count > save->bl_blocks) 355 count = save->bl_blocks; 356 blst_copy(save->bl_root, 0, save->bl_radix, save->bl_skip, newbl, count); 357 358 /* 359 * If resizing upwards, should we free the new space or not? 360 */ 361 if (freenew && count < newbl->bl_blocks) { 362 blist_free(newbl, count, newbl->bl_blocks - count); 363 } 364 blist_destroy(save); 365 } 366 367 #define GAPFIND_FIRSTFREE 0 368 #define GAPFIND_FIRSTUSED 1 369 370 /* 371 * blist_gapfind() - return the largest gap (free pages) in blist. 372 * the blist isn't modified. the returned range 373 * is [maxbp, maxep[ . The size of the gap is 374 * maxep - maxbp. If not found, the size is 0. 375 */ 376 377 void 378 blist_gapfind(blist_t bl, swblk_t *maxbp, swblk_t *maxep) 379 { 380 int state; 381 swblk_t b, e; 382 383 /* initialize gaps (max and current) */ 384 *maxbp = *maxep = 0; 385 b = e = 0; 386 387 /* search the larger gap from block 0 */ 388 state = blst_radix_gapfind(bl->bl_root, 0, bl->bl_radix, bl->bl_skip, 389 GAPFIND_FIRSTFREE, maxbp, maxep, &b, &e); 390 391 if (state == GAPFIND_FIRSTUSED) { 392 e = bl->bl_blocks; 393 if (*maxep - *maxbp < e - b) { 394 *maxbp = b; 395 *maxep = e; 396 } 397 } 398 399 KDASSERT(*maxbp <= *maxep); 400 KDASSERT(*maxbp < bl->bl_blocks); 401 KDASSERT(*maxep <= bl->bl_blocks); 402 } 403 404 /* 405 * blst_radix_gapfind - search the larger gap in one pass 406 * 407 * - search first free block, from X -> set B 408 * - search first used block, from B -> set E 409 * - if the size (E - B) is larger than max, update it 410 * - loop (with X=E) until end of blist 411 * - max is the larger free gap 412 */ 413 static int 414 blst_radix_gapfind(blmeta_t *scan, swblk_t blk, swblk_t radix, swblk_t skip, 415 int state, swblk_t *maxbp, swblk_t *maxep, swblk_t *bp, swblk_t *ep) 416 { 417 swblk_t i; 418 swblk_t next_skip; 419 420 if (radix == BLIST_BMAP_RADIX) { 421 /* leaf node: we consider only completely free bitmaps as free */ 422 if (state == GAPFIND_FIRSTFREE) { 423 if (scan->u.bmu_bitmap == (u_swblk_t)-1) { 424 /* node is fully free */ 425 *bp = blk; 426 return GAPFIND_FIRSTUSED; 427 } 428 429 /* it isn't fully free, not found, keep state */ 430 return state; 431 432 } else if (state == GAPFIND_FIRSTUSED) { 433 if (scan->u.bmu_bitmap == (u_swblk_t)-1) { 434 /* it is free, not found, keep state */ 435 return state; 436 } 437 438 /* it is (at least partially) used */ 439 *ep = blk; 440 if (*maxep - *maxbp < *ep - *bp) { 441 *maxbp = *bp; 442 *maxep = *ep; 443 } 444 return GAPFIND_FIRSTFREE; 445 } 446 } 447 448 if (scan->u.bmu_avail == 0) { 449 /* ALL-ALLOCATED */ 450 if (state == GAPFIND_FIRSTFREE) { 451 /* searching free block, not found, keep state */ 452 return state; 453 454 } else if (state == GAPFIND_FIRSTUSED) { 455 /* searching used block, found */ 456 *ep = blk; 457 if (*maxep - *maxbp < *ep - *bp) { 458 *maxbp = *bp; 459 *maxep = *ep; 460 } 461 return GAPFIND_FIRSTFREE; 462 } 463 } 464 465 if (scan->u.bmu_avail == radix) { 466 /* ALL-FREE */ 467 if (state == GAPFIND_FIRSTFREE) { 468 /* searching free block, found */ 469 *bp = blk; 470 return GAPFIND_FIRSTUSED; 471 472 } else if (state == GAPFIND_FIRSTUSED) { 473 /* searching used block, not found, keep state */ 474 return state; 475 } 476 } 477 478 radix /= BLIST_META_RADIX; 479 next_skip = (skip / BLIST_META_RADIX); 480 481 for (i = 1; i <= skip; i += next_skip) { 482 if (scan[i].bm_bighint == (swblk_t)-1) 483 /* Terminator */ 484 break; 485 486 state = blst_radix_gapfind(&scan[i], blk, radix, next_skip - 1, 487 state, maxbp, maxep, bp, ep); 488 489 blk += radix; 490 } 491 492 return state; 493 } 494 495 #if defined(BLIST_DEBUG) || defined(DDB) 496 497 /* 498 * blist_print() - dump radix tree 499 */ 500 501 void 502 blist_print(blist_t bl) 503 { 504 printf("BLIST {\n"); 505 blst_radix_print(bl->bl_root, 0, bl->bl_radix, bl->bl_skip, 4); 506 printf("}\n"); 507 } 508 509 #endif 510 511 /************************************************************************ 512 * ALLOCATION SUPPORT FUNCTIONS * 513 ************************************************************************ 514 * 515 * These support functions do all the actual work. They may seem 516 * rather longish, but that's because I've commented them up. The 517 * actual code is straight forward. 518 * 519 */ 520 521 /* 522 * blist_leaf_alloc() - allocate at a leaf in the radix tree (a bitmap). 523 * 524 * This is the core of the allocator and is optimized for the 1 block 525 * and the BLIST_BMAP_RADIX block allocation cases. Other cases are 526 * somewhat slower. The 1 block allocation case is log2 and extremely 527 * quick. 528 */ 529 530 static swblk_t 531 blst_leaf_alloc(blmeta_t *scan, swblk_t blkat __unused, swblk_t blk, 532 swblk_t count) 533 { 534 u_swblk_t orig = scan->u.bmu_bitmap; 535 536 if (orig == 0) { 537 /* 538 * Optimize bitmap all-allocated case. Also, count = 1 539 * case assumes at least 1 bit is free in the bitmap, so 540 * we have to take care of this case here. 541 */ 542 scan->bm_bighint = 0; 543 return(SWAPBLK_NONE); 544 } 545 if (count == 1) { 546 /* 547 * Optimized code to allocate one bit out of the bitmap 548 */ 549 u_swblk_t mask; 550 int j = BLIST_BMAP_RADIX/2; 551 int r = 0; 552 553 mask = (u_swblk_t)-1 >> (BLIST_BMAP_RADIX/2); 554 555 while (j) { 556 if ((orig & mask) == 0) { 557 r += j; 558 orig >>= j; 559 } 560 j >>= 1; 561 mask >>= j; 562 } 563 scan->u.bmu_bitmap &= ~((u_swblk_t)1 << r); 564 return(blk + r); 565 } 566 if (count <= BLIST_BMAP_RADIX) { 567 /* 568 * non-optimized code to allocate N bits out of the bitmap. 569 * The more bits, the faster the code runs. It will run 570 * the slowest allocating 2 bits, but since there aren't any 571 * memory ops in the core loop (or shouldn't be, anyway), 572 * you probably won't notice the difference. 573 */ 574 int j; 575 int n = (int)(BLIST_BMAP_RADIX - count); 576 u_swblk_t mask; 577 578 mask = (u_swblk_t)-1 >> n; 579 580 for (j = 0; j <= n; ++j) { 581 if ((orig & mask) == mask) { 582 scan->u.bmu_bitmap &= ~mask; 583 return(blk + j); 584 } 585 mask = (mask << 1); 586 } 587 } 588 589 /* 590 * We couldn't allocate count in this subtree, update bighint. 591 */ 592 scan->bm_bighint = count - 1; 593 594 return(SWAPBLK_NONE); 595 } 596 597 /* 598 * blist_meta_alloc() - allocate at a meta in the radix tree. 599 * 600 * Attempt to allocate at a meta node. If we can't, we update 601 * bighint and return a failure. Updating bighint optimize future 602 * calls that hit this node. We have to check for our collapse cases 603 * and we have a few optimizations strewn in as well. 604 */ 605 static swblk_t 606 blst_meta_alloc(blmeta_t *scan, swblk_t blkat, 607 swblk_t blk, swblk_t count, 608 swblk_t radix, swblk_t skip) 609 { 610 int hintok = (blk >= blkat); 611 swblk_t next_skip = ((swblk_t)skip / BLIST_META_RADIX); 612 swblk_t i; 613 614 #ifndef _KERNEL 615 printf("blist_meta_alloc blkat %lu blk %lu count %lu radix %lu\n", 616 blkat, blk, count, radix); 617 #endif 618 619 /* 620 * ALL-ALLOCATED special case 621 */ 622 if (scan->u.bmu_avail == 0) { 623 scan->bm_bighint = 0; 624 return(SWAPBLK_NONE); 625 } 626 627 /* 628 * ALL-FREE special case, initialize uninitialized 629 * sublevel. 630 * 631 * NOTE: radix may exceed 32 bits until first division. 632 */ 633 if (scan->u.bmu_avail == radix) { 634 scan->bm_bighint = radix; 635 636 radix /= BLIST_META_RADIX; 637 for (i = 1; i <= skip; i += next_skip) { 638 if (scan[i].bm_bighint == (swblk_t)-1) 639 break; 640 if (next_skip == 1) { 641 scan[i].u.bmu_bitmap = (u_swblk_t)-1; 642 scan[i].bm_bighint = BLIST_BMAP_RADIX; 643 } else { 644 scan[i].bm_bighint = (swblk_t)radix; 645 scan[i].u.bmu_avail = (swblk_t)radix; 646 } 647 } 648 } else { 649 radix /= BLIST_META_RADIX; 650 } 651 652 for (i = 1; i <= skip; i += next_skip) { 653 if (scan[i].bm_bighint == (swblk_t)-1) { 654 /* 655 * Terminator 656 * 657 * note: check it first, as swblk_t may be unsigned. 658 * otherwise, the second if() might match and the 659 * Terminator will be ignored. 660 */ 661 break; 662 } 663 664 if (count <= scan[i].bm_bighint && 665 blk + (swblk_t)radix > blkat) { 666 /* 667 * count fits in object 668 */ 669 swblk_t r; 670 if (next_skip == 1) { 671 r = blst_leaf_alloc(&scan[i], blkat, 672 blk, count); 673 } else { 674 r = blst_meta_alloc(&scan[i], blkat, 675 blk, count, 676 radix, next_skip - 1); 677 } 678 if (r != SWAPBLK_NONE) { 679 scan->u.bmu_avail -= count; 680 if (scan->bm_bighint > scan->u.bmu_avail) 681 scan->bm_bighint = scan->u.bmu_avail; 682 return(r); 683 } 684 /* bighint was updated by recursion */ 685 } else if (count > (swblk_t)radix) { 686 /* 687 * count does not fit in object even if it were 688 * complete free. 689 */ 690 panic("%s: allocation too large %lu/%lu", 691 __func__, count, radix); 692 } 693 blk += (swblk_t)radix; 694 } 695 696 /* 697 * We couldn't allocate count in this subtree, update bighint. 698 */ 699 if (hintok && scan->bm_bighint >= count) 700 scan->bm_bighint = count - 1; 701 return(SWAPBLK_NONE); 702 } 703 704 /* 705 * BLST_LEAF_FREE() - free allocated block from leaf bitmap 706 */ 707 static void 708 blst_leaf_free(blmeta_t *scan, swblk_t blk, swblk_t count) 709 { 710 /* 711 * free some data in this bitmap 712 * 713 * e.g. 714 * 0000111111111110000 715 * \_________/\__/ 716 * v n 717 */ 718 int n = blk & (BLIST_BMAP_RADIX - 1); 719 u_swblk_t mask; 720 721 mask = ((u_swblk_t)-1 << n) & 722 ((u_swblk_t)-1 >> (BLIST_BMAP_RADIX - count - n)); 723 724 if (scan->u.bmu_bitmap & mask) 725 panic("%s: freeing free block", __func__); 726 scan->u.bmu_bitmap |= mask; 727 728 /* 729 * We could probably do a better job here. We are required to make 730 * bighint at least as large as the biggest contiguous block of 731 * data. If we just shoehorn it, a little extra overhead will 732 * be incurred on the next allocation (but only that one typically). 733 */ 734 scan->bm_bighint = BLIST_BMAP_RADIX; 735 } 736 737 /* 738 * BLST_META_FREE() - free allocated blocks from radix tree meta info 739 * 740 * This support routine frees a range of blocks from the bitmap. 741 * The range must be entirely enclosed by this radix node. If a 742 * meta node, we break the range down recursively to free blocks 743 * in subnodes (which means that this code can free an arbitrary 744 * range whereas the allocation code cannot allocate an arbitrary 745 * range). 746 */ 747 748 static void 749 blst_meta_free(blmeta_t *scan, swblk_t freeBlk, swblk_t count, 750 swblk_t radix, swblk_t skip, swblk_t blk) 751 { 752 swblk_t i; 753 swblk_t next_skip = ((swblk_t)skip / BLIST_META_RADIX); 754 755 #if 0 756 printf("FREE (%04lx,%lu) FROM (%04lx,%lu)\n", 757 freeBlk, count, 758 blk, radix 759 ); 760 #endif 761 762 /* 763 * ALL-ALLOCATED special case, initialize for recursion. 764 * 765 * We will short-cut the ALL-ALLOCATED -> ALL-FREE case. 766 */ 767 if (scan->u.bmu_avail == 0) { 768 scan->u.bmu_avail = count; 769 scan->bm_bighint = count; 770 771 if (count != radix) { 772 for (i = 1; i <= skip; i += next_skip) { 773 if (scan[i].bm_bighint == (swblk_t)-1) 774 break; 775 scan[i].bm_bighint = 0; 776 if (next_skip == 1) { 777 scan[i].u.bmu_bitmap = 0; 778 } else { 779 scan[i].u.bmu_avail = 0; 780 } 781 } 782 /* fall through */ 783 } 784 } else { 785 scan->u.bmu_avail += count; 786 /* scan->bm_bighint = radix; */ 787 } 788 789 /* 790 * ALL-FREE special case. 791 * 792 * Set bighint for higher levels to snoop. 793 */ 794 if (scan->u.bmu_avail == radix) { 795 scan->bm_bighint = radix; 796 return; 797 } 798 799 /* 800 * Break the free down into its components 801 */ 802 if (scan->u.bmu_avail > radix) { 803 panic("%s: freeing already " 804 "free blocks (%lu) %lu/%lu", 805 __func__, count, (long)scan->u.bmu_avail, radix); 806 } 807 808 radix /= BLIST_META_RADIX; 809 810 i = (freeBlk - blk) / (swblk_t)radix; 811 blk += i * (swblk_t)radix; 812 i = i * next_skip + 1; 813 814 while (i <= skip && blk < freeBlk + count) { 815 swblk_t v; 816 817 v = blk + (swblk_t)radix - freeBlk; 818 if (v > count) 819 v = count; 820 821 if (scan->bm_bighint == (swblk_t)-1) 822 panic("%s: freeing unexpected range", __func__); 823 824 if (next_skip == 1) { 825 blst_leaf_free(&scan[i], freeBlk, v); 826 } else { 827 blst_meta_free(&scan[i], freeBlk, v, 828 radix, next_skip - 1, blk); 829 } 830 831 /* 832 * After having dealt with the becomes-all-free case any 833 * partial free will not be able to bring us to the 834 * becomes-all-free state. 835 * 836 * We can raise bighint to at least the sub-segment's 837 * bighint. 838 */ 839 if (scan->bm_bighint < scan[i].bm_bighint) { 840 scan->bm_bighint = scan[i].bm_bighint; 841 } 842 count -= v; 843 freeBlk += v; 844 blk += (swblk_t)radix; 845 i += next_skip; 846 } 847 } 848 849 /* 850 * BLST_LEAF_FILL() - allocate specific blocks in leaf bitmap 851 * 852 * Allocates all blocks in the specified range regardless of 853 * any existing allocations in that range. Returns the number 854 * of blocks allocated by the call. 855 */ 856 static swblk_t 857 blst_leaf_fill(blmeta_t *scan, swblk_t blk, swblk_t count) 858 { 859 int n = blk & (BLIST_BMAP_RADIX - 1); 860 swblk_t nblks; 861 u_swblk_t mask, bitmap; 862 863 mask = ((u_swblk_t)-1 << n) & 864 ((u_swblk_t)-1 >> (BLIST_BMAP_RADIX - count - n)); 865 866 /* Count the number of blocks we're about to allocate */ 867 bitmap = scan->u.bmu_bitmap & mask; 868 for (nblks = 0; bitmap != 0; nblks++) 869 bitmap &= bitmap - 1; 870 871 scan->u.bmu_bitmap &= ~mask; 872 return (nblks); 873 } 874 875 /* 876 * BLST_META_FILL() - allocate specific blocks at a meta node 877 * 878 * Allocates the specified range of blocks, regardless of 879 * any existing allocations in the range. The range must 880 * be within the extent of this node. Returns the number 881 * of blocks allocated by the call. 882 */ 883 static swblk_t 884 blst_meta_fill(blmeta_t *scan, swblk_t fillBlk, swblk_t count, 885 swblk_t radix, swblk_t skip, swblk_t blk) 886 { 887 swblk_t i; 888 swblk_t next_skip = ((swblk_t)skip / BLIST_META_RADIX); 889 swblk_t nblks = 0; 890 891 if (count == radix || scan->u.bmu_avail == 0) { 892 /* 893 * ALL-ALLOCATED special case 894 */ 895 nblks = scan->u.bmu_avail; 896 scan->u.bmu_avail = 0; 897 scan->bm_bighint = count; 898 return (nblks); 899 } 900 901 if (scan->u.bmu_avail == radix) { 902 radix /= BLIST_META_RADIX; 903 904 /* 905 * ALL-FREE special case, initialize sublevel 906 */ 907 for (i = 1; i <= skip; i += next_skip) { 908 if (scan[i].bm_bighint == (swblk_t)-1) 909 break; 910 if (next_skip == 1) { 911 scan[i].u.bmu_bitmap = (u_swblk_t)-1; 912 scan[i].bm_bighint = BLIST_BMAP_RADIX; 913 } else { 914 scan[i].bm_bighint = (swblk_t)radix; 915 scan[i].u.bmu_avail = (swblk_t)radix; 916 } 917 } 918 } else { 919 radix /= BLIST_META_RADIX; 920 } 921 922 if (count > (swblk_t)radix) 923 panic("%s: allocation too large", __func__); 924 925 i = (fillBlk - blk) / (swblk_t)radix; 926 blk += i * (swblk_t)radix; 927 i = i * next_skip + 1; 928 929 while (i <= skip && blk < fillBlk + count) { 930 swblk_t v; 931 932 v = blk + (swblk_t)radix - fillBlk; 933 if (v > count) 934 v = count; 935 936 if (scan->bm_bighint == (swblk_t)-1) 937 panic("%s: filling unexpected range", __func__); 938 939 if (next_skip == 1) { 940 nblks += blst_leaf_fill(&scan[i], fillBlk, v); 941 } else { 942 nblks += blst_meta_fill(&scan[i], fillBlk, v, 943 radix, next_skip - 1, blk); 944 } 945 count -= v; 946 fillBlk += v; 947 blk += (swblk_t)radix; 948 i += next_skip; 949 } 950 scan->u.bmu_avail -= nblks; 951 return (nblks); 952 } 953 954 /* 955 * BLIST_RADIX_COPY() - copy one radix tree to another 956 * 957 * Locates free space in the source tree and frees it in the destination 958 * tree. The space may not already be free in the destination. 959 */ 960 961 static void 962 blst_copy(blmeta_t *scan, swblk_t blk, swblk_t radix, 963 swblk_t skip, blist_t dest, swblk_t count) 964 { 965 swblk_t next_skip; 966 swblk_t i; 967 968 /* 969 * Leaf node 970 */ 971 972 if (radix == BLIST_BMAP_RADIX) { 973 u_swblk_t v = scan->u.bmu_bitmap; 974 975 if (v == (u_swblk_t)-1) { 976 blist_free(dest, blk, count); 977 } else if (v != 0) { 978 for (i = 0; i < BLIST_BMAP_RADIX && i < count; ++i) { 979 if (v & ((swblk_t)1 << i)) 980 blist_free(dest, blk + i, 1); 981 } 982 } 983 return; 984 } 985 986 /* 987 * Meta node 988 */ 989 990 if (scan->u.bmu_avail == 0) { 991 /* 992 * Source all allocated, leave dest allocated 993 */ 994 return; 995 } 996 if (scan->u.bmu_avail == radix) { 997 /* 998 * Source all free, free entire dest 999 */ 1000 if (count < radix) 1001 blist_free(dest, blk, count); 1002 else 1003 blist_free(dest, blk, (swblk_t)radix); 1004 return; 1005 } 1006 1007 1008 radix /= BLIST_META_RADIX; 1009 next_skip = ((u_swblk_t)skip / BLIST_META_RADIX); 1010 1011 for (i = 1; count && i <= skip; i += next_skip) { 1012 if (scan[i].bm_bighint == (swblk_t)-1) 1013 break; 1014 1015 if (count >= (swblk_t)radix) { 1016 blst_copy( 1017 &scan[i], 1018 blk, 1019 radix, 1020 next_skip - 1, 1021 dest, 1022 (swblk_t)radix 1023 ); 1024 count -= (swblk_t)radix; 1025 } else { 1026 if (count) { 1027 blst_copy( 1028 &scan[i], 1029 blk, 1030 radix, 1031 next_skip - 1, 1032 dest, 1033 count 1034 ); 1035 } 1036 count = 0; 1037 } 1038 blk += (swblk_t)radix; 1039 } 1040 } 1041 1042 /* 1043 * BLST_RADIX_INIT() - initialize radix tree 1044 * 1045 * Initialize our meta structures and bitmaps and calculate the exact 1046 * amount of space required to manage 'count' blocks - this space may 1047 * be considerably less than the calculated radix due to the large 1048 * RADIX values we use. 1049 */ 1050 1051 static swblk_t 1052 blst_radix_init(blmeta_t *scan, swblk_t radix, swblk_t skip, swblk_t count) 1053 { 1054 swblk_t i; 1055 swblk_t next_skip; 1056 swblk_t memindex = 0; 1057 1058 /* 1059 * Leaf node 1060 */ 1061 1062 if (radix == BLIST_BMAP_RADIX) { 1063 if (scan) { 1064 scan->bm_bighint = 0; 1065 scan->u.bmu_bitmap = 0; 1066 } 1067 return(memindex); 1068 } 1069 1070 /* 1071 * Meta node. If allocating the entire object we can special 1072 * case it. However, we need to figure out how much memory 1073 * is required to manage 'count' blocks, so we continue on anyway. 1074 */ 1075 1076 if (scan) { 1077 scan->bm_bighint = 0; 1078 scan->u.bmu_avail = 0; 1079 } 1080 1081 radix /= BLIST_META_RADIX; 1082 next_skip = ((u_swblk_t)skip / BLIST_META_RADIX); 1083 1084 for (i = 1; i <= skip; i += next_skip) { 1085 if (count >= (swblk_t)radix) { 1086 /* 1087 * Allocate the entire object 1088 */ 1089 memindex = i + blst_radix_init( 1090 ((scan) ? &scan[i] : NULL), 1091 radix, 1092 next_skip - 1, 1093 (swblk_t)radix 1094 ); 1095 count -= (swblk_t)radix; 1096 } else if (count > 0) { 1097 /* 1098 * Allocate a partial object 1099 */ 1100 memindex = i + blst_radix_init( 1101 ((scan) ? &scan[i] : NULL), 1102 radix, 1103 next_skip - 1, 1104 count 1105 ); 1106 count = 0; 1107 } else { 1108 /* 1109 * Add terminator and break out 1110 */ 1111 if (scan) 1112 scan[i].bm_bighint = (swblk_t)-1; 1113 break; 1114 } 1115 } 1116 if (memindex < i) 1117 memindex = i; 1118 return(memindex); 1119 } 1120 1121 #if defined(BLIST_DEBUG) || defined(DDB) 1122 1123 static void 1124 blst_radix_print(blmeta_t *scan, swblk_t blk, swblk_t radix, swblk_t skip, int tab) 1125 { 1126 swblk_t i; 1127 swblk_t next_skip; 1128 1129 if (radix == BLIST_BMAP_RADIX) { 1130 printf( 1131 "%*.*s(%04lx,%lu): bitmap %0*llx big=%lu\n", 1132 tab, tab, "", 1133 blk, radix, 1134 (int)(1 + (BLIST_BMAP_RADIX - 1) / 4), 1135 scan->u.bmu_bitmap, 1136 scan->bm_bighint 1137 ); 1138 return; 1139 } 1140 1141 if (scan->u.bmu_avail == 0) { 1142 printf( 1143 "%*.*s(%04lx,%lu) ALL ALLOCATED\n", 1144 tab, tab, "", 1145 blk, 1146 radix 1147 ); 1148 return; 1149 } 1150 if (scan->u.bmu_avail == radix) { 1151 printf( 1152 "%*.*s(%04lx,%lu) ALL FREE\n", 1153 tab, tab, "", 1154 blk, 1155 radix 1156 ); 1157 return; 1158 } 1159 1160 printf( 1161 "%*.*s(%04lx,%lu): subtree (%lu/%lu) big=%lu {\n", 1162 tab, tab, "", 1163 blk, radix, 1164 scan->u.bmu_avail, 1165 radix, 1166 scan->bm_bighint 1167 ); 1168 1169 radix /= BLIST_META_RADIX; 1170 next_skip = ((u_swblk_t)skip / BLIST_META_RADIX); 1171 tab += 4; 1172 1173 for (i = 1; i <= skip; i += next_skip) { 1174 if (scan[i].bm_bighint == (swblk_t)-1) { 1175 printf( 1176 "%*.*s(%04lx,%lu): Terminator\n", 1177 tab, tab, "", 1178 blk, radix 1179 ); 1180 break; 1181 } 1182 blst_radix_print( 1183 &scan[i], 1184 blk, 1185 radix, 1186 next_skip - 1, 1187 tab 1188 ); 1189 blk += (swblk_t)radix; 1190 } 1191 tab -= 4; 1192 1193 printf( 1194 "%*.*s}\n", 1195 tab, tab, "" 1196 ); 1197 } 1198 1199 #endif 1200 1201 #if !defined(_KERNEL) && defined(BLIST_DEBUG) 1202 1203 int 1204 main(int ac, char **av) 1205 { 1206 swblk_t size = 1024; 1207 swblk_t i; 1208 blist_t bl; 1209 1210 for (i = 1; i < (swblk_t)ac; ++i) { 1211 const char *ptr = av[i]; 1212 if (*ptr != '-') { 1213 size = strtol(ptr, NULL, 0); 1214 continue; 1215 } 1216 ptr += 2; 1217 fprintf(stderr, "Bad option: %s\n", ptr - 2); 1218 exit(1); 1219 } 1220 bl = blist_create(size); 1221 blist_free(bl, 0, size); 1222 1223 for (;;) { 1224 char buf[1024]; 1225 swblk_t da = 0; 1226 swblk_t count = 0; 1227 swblk_t blkat; 1228 1229 1230 printf("%lu/%lu/%lu> ", 1231 bl->bl_free, size, bl->bl_radix); 1232 fflush(stdout); 1233 if (fgets(buf, sizeof(buf), stdin) == NULL) 1234 break; 1235 switch(buf[0]) { 1236 case '#': 1237 continue; 1238 case 'r': 1239 if (sscanf(buf + 1, "%li", &count) == 1) { 1240 blist_resize(&bl, count, 1); 1241 size = count; 1242 } else { 1243 printf("?\n"); 1244 } 1245 case 'p': 1246 blist_print(bl); 1247 break; 1248 case 'a': 1249 if (sscanf(buf + 1, "%li %li", &count, &blkat) == 1) { 1250 printf("count %lu\n", count); 1251 swblk_t blk = blist_alloc(bl, count); 1252 if (blk == SWAPBLK_NONE) 1253 printf(" R=SWAPBLK_NONE\n"); 1254 else 1255 printf(" R=%04lx\n", blk); 1256 } else if (sscanf(buf + 1, "%li %li", &count, &blkat) == 2) { 1257 swblk_t blk = blist_allocat(bl, count, blkat); 1258 if (blk == SWAPBLK_NONE) 1259 printf(" R=SWAPBLK_NONE\n"); 1260 else 1261 printf(" R=%04lx\n", blk); 1262 } else { 1263 printf("?\n"); 1264 } 1265 break; 1266 case 'f': 1267 if (sscanf(buf + 1, "%li %li", &da, &count) == 2) { 1268 blist_free(bl, da, count); 1269 } else { 1270 printf("?\n"); 1271 } 1272 break; 1273 case 'g': { 1274 swblk_t b, e; 1275 blist_gapfind(bl, &b, &e); 1276 printf("gapfind: begin=%04lx end=%04lx size=%lu\n", 1277 b, e, e-b); 1278 break; 1279 } 1280 case 'l': 1281 if (sscanf(buf + 1, "%li %li", &da, &count) == 2) { 1282 printf(" n=%lu\n", 1283 blist_fill(bl, da, count)); 1284 } else { 1285 printf("?\n"); 1286 } 1287 break; 1288 case '?': 1289 case 'h': 1290 puts( 1291 "p -print\n" 1292 "a %li -allocate\n" 1293 "f %li %li -free\n" 1294 "l %li %li -fill\n" 1295 "g -gapfind\n" 1296 "r %li -resize\n" 1297 "h/? -help\n" 1298 " hex may be specified with 0x prefix\n" 1299 ); 1300 break; 1301 default: 1302 printf("?\n"); 1303 break; 1304 } 1305 } 1306 return(0); 1307 } 1308 1309 #endif 1310