1 /* $NetBSD: subr_vmem.c,v 1.24 2006/11/18 07:51:54 yamt Exp $ */ 2 3 /*- 4 * Copyright (c)2006 YAMAMOTO Takashi, 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 */ 28 29 /* 30 * reference: 31 * - Magazines and Vmem: Extending the Slab Allocator 32 * to Many CPUs and Arbitrary Resources 33 * http://www.usenix.org/event/usenix01/bonwick.html 34 * 35 * todo: 36 * - decide how to import segments for vmem_xalloc. 37 * - don't rely on malloc(9). 38 */ 39 40 #include <sys/cdefs.h> 41 __KERNEL_RCSID(0, "$NetBSD: subr_vmem.c,v 1.24 2006/11/18 07:51:54 yamt Exp $"); 42 43 #define VMEM_DEBUG 44 #if defined(_KERNEL) 45 #define QCACHE 46 #endif /* defined(_KERNEL) */ 47 48 #include <sys/param.h> 49 #include <sys/hash.h> 50 #include <sys/queue.h> 51 52 #if defined(_KERNEL) 53 #include <sys/systm.h> 54 #include <sys/lock.h> 55 #include <sys/malloc.h> 56 #include <sys/once.h> 57 #include <sys/pool.h> 58 #include <sys/proc.h> 59 #include <sys/vmem.h> 60 #else /* defined(_KERNEL) */ 61 #include "../sys/vmem.h" 62 #endif /* defined(_KERNEL) */ 63 64 #if defined(_KERNEL) 65 #define SIMPLELOCK_DECL(name) struct simplelock name 66 #else /* defined(_KERNEL) */ 67 #include <errno.h> 68 #include <assert.h> 69 #include <stdlib.h> 70 71 #define KASSERT(a) assert(a) 72 #define SIMPLELOCK_DECL(name) /* nothing */ 73 #define LOCK_ASSERT(a) /* nothing */ 74 #define simple_lock_init(a) /* nothing */ 75 #define simple_lock(a) /* nothing */ 76 #define simple_unlock(a) /* nothing */ 77 #define ASSERT_SLEEPABLE(lk, msg) /* nothing */ 78 #endif /* defined(_KERNEL) */ 79 80 struct vmem; 81 struct vmem_btag; 82 83 #if defined(VMEM_DEBUG) 84 void vmem_dump(const vmem_t *); 85 #endif /* defined(VMEM_DEBUG) */ 86 87 #define VMEM_MAXORDER (sizeof(vmem_size_t) * CHAR_BIT) 88 #define VMEM_HASHSIZE_INIT 4096 /* XXX */ 89 90 #define VM_FITMASK (VM_BESTFIT | VM_INSTANTFIT) 91 92 CIRCLEQ_HEAD(vmem_seglist, vmem_btag); 93 LIST_HEAD(vmem_freelist, vmem_btag); 94 LIST_HEAD(vmem_hashlist, vmem_btag); 95 96 #if defined(QCACHE) 97 #define VMEM_QCACHE_IDX_MAX 32 98 99 #define QC_NAME_MAX 16 100 101 struct qcache { 102 struct pool qc_pool; 103 struct pool_cache qc_cache; 104 vmem_t *qc_vmem; 105 char qc_name[QC_NAME_MAX]; 106 }; 107 typedef struct qcache qcache_t; 108 #define QC_POOL_TO_QCACHE(pool) ((qcache_t *)(pool)) 109 #endif /* defined(QCACHE) */ 110 111 /* vmem arena */ 112 struct vmem { 113 SIMPLELOCK_DECL(vm_lock); 114 vmem_addr_t (*vm_allocfn)(vmem_t *, vmem_size_t, vmem_size_t *, 115 vm_flag_t); 116 void (*vm_freefn)(vmem_t *, vmem_addr_t, vmem_size_t); 117 vmem_t *vm_source; 118 struct vmem_seglist vm_seglist; 119 struct vmem_freelist vm_freelist[VMEM_MAXORDER]; 120 size_t vm_hashsize; 121 size_t vm_nbusytag; 122 struct vmem_hashlist *vm_hashlist; 123 size_t vm_quantum_mask; 124 int vm_quantum_shift; 125 const char *vm_name; 126 127 #if defined(QCACHE) 128 /* quantum cache */ 129 size_t vm_qcache_max; 130 struct pool_allocator vm_qcache_allocator; 131 qcache_t vm_qcache_store[VMEM_QCACHE_IDX_MAX]; 132 qcache_t *vm_qcache[VMEM_QCACHE_IDX_MAX]; 133 #endif /* defined(QCACHE) */ 134 }; 135 136 #define VMEM_LOCK(vm) simple_lock(&vm->vm_lock) 137 #define VMEM_UNLOCK(vm) simple_unlock(&vm->vm_lock) 138 #define VMEM_LOCK_INIT(vm) simple_lock_init(&vm->vm_lock); 139 #define VMEM_ASSERT_LOCKED(vm) \ 140 LOCK_ASSERT(simple_lock_held(&vm->vm_lock)) 141 #define VMEM_ASSERT_UNLOCKED(vm) \ 142 LOCK_ASSERT(!simple_lock_held(&vm->vm_lock)) 143 144 /* boundary tag */ 145 struct vmem_btag { 146 CIRCLEQ_ENTRY(vmem_btag) bt_seglist; 147 union { 148 LIST_ENTRY(vmem_btag) u_freelist; /* BT_TYPE_FREE */ 149 LIST_ENTRY(vmem_btag) u_hashlist; /* BT_TYPE_BUSY */ 150 } bt_u; 151 #define bt_hashlist bt_u.u_hashlist 152 #define bt_freelist bt_u.u_freelist 153 vmem_addr_t bt_start; 154 vmem_size_t bt_size; 155 int bt_type; 156 }; 157 158 #define BT_TYPE_SPAN 1 159 #define BT_TYPE_SPAN_STATIC 2 160 #define BT_TYPE_FREE 3 161 #define BT_TYPE_BUSY 4 162 #define BT_ISSPAN_P(bt) ((bt)->bt_type <= BT_TYPE_SPAN_STATIC) 163 164 #define BT_END(bt) ((bt)->bt_start + (bt)->bt_size) 165 166 typedef struct vmem_btag bt_t; 167 168 /* ---- misc */ 169 170 #define VMEM_ALIGNUP(addr, align) \ 171 (-(-(addr) & -(align))) 172 #define VMEM_CROSS_P(addr1, addr2, boundary) \ 173 ((((addr1) ^ (addr2)) & -(boundary)) != 0) 174 175 #define ORDER2SIZE(order) ((vmem_size_t)1 << (order)) 176 177 static int 178 calc_order(vmem_size_t size) 179 { 180 vmem_size_t target; 181 int i; 182 183 KASSERT(size != 0); 184 185 i = 0; 186 target = size >> 1; 187 while (ORDER2SIZE(i) <= target) { 188 i++; 189 } 190 191 KASSERT(ORDER2SIZE(i) <= size); 192 KASSERT(size < ORDER2SIZE(i + 1) || ORDER2SIZE(i + 1) < ORDER2SIZE(i)); 193 194 return i; 195 } 196 197 #if defined(_KERNEL) 198 static MALLOC_DEFINE(M_VMEM, "vmem", "vmem"); 199 #endif /* defined(_KERNEL) */ 200 201 static void * 202 xmalloc(size_t sz, vm_flag_t flags) 203 { 204 205 #if defined(_KERNEL) 206 return malloc(sz, M_VMEM, 207 M_CANFAIL | ((flags & VM_SLEEP) ? M_WAITOK : M_NOWAIT)); 208 #else /* defined(_KERNEL) */ 209 return malloc(sz); 210 #endif /* defined(_KERNEL) */ 211 } 212 213 static void 214 xfree(void *p) 215 { 216 217 #if defined(_KERNEL) 218 return free(p, M_VMEM); 219 #else /* defined(_KERNEL) */ 220 return free(p); 221 #endif /* defined(_KERNEL) */ 222 } 223 224 /* ---- boundary tag */ 225 226 #if defined(_KERNEL) 227 static struct pool_cache bt_poolcache; 228 static POOL_INIT(bt_pool, sizeof(bt_t), 0, 0, 0, "vmembtpl", NULL); 229 #endif /* defined(_KERNEL) */ 230 231 static bt_t * 232 bt_alloc(vmem_t *vm, vm_flag_t flags) 233 { 234 bt_t *bt; 235 236 #if defined(_KERNEL) 237 int s; 238 239 /* XXX bootstrap */ 240 s = splvm(); 241 bt = pool_cache_get(&bt_poolcache, 242 (flags & VM_SLEEP) != 0 ? PR_WAITOK : PR_NOWAIT); 243 splx(s); 244 #else /* defined(_KERNEL) */ 245 bt = malloc(sizeof *bt); 246 #endif /* defined(_KERNEL) */ 247 248 return bt; 249 } 250 251 static void 252 bt_free(vmem_t *vm, bt_t *bt) 253 { 254 255 #if defined(_KERNEL) 256 int s; 257 258 /* XXX bootstrap */ 259 s = splvm(); 260 pool_cache_put(&bt_poolcache, bt); 261 splx(s); 262 #else /* defined(_KERNEL) */ 263 free(bt); 264 #endif /* defined(_KERNEL) */ 265 } 266 267 /* 268 * freelist[0] ... [1, 1] 269 * freelist[1] ... [2, 3] 270 * freelist[2] ... [4, 7] 271 * freelist[3] ... [8, 15] 272 * : 273 * freelist[n] ... [(1 << n), (1 << (n + 1)) - 1] 274 * : 275 */ 276 277 static struct vmem_freelist * 278 bt_freehead_tofree(vmem_t *vm, vmem_size_t size) 279 { 280 const vmem_size_t qsize = size >> vm->vm_quantum_shift; 281 int idx; 282 283 KASSERT((size & vm->vm_quantum_mask) == 0); 284 KASSERT(size != 0); 285 286 idx = calc_order(qsize); 287 KASSERT(idx >= 0); 288 KASSERT(idx < VMEM_MAXORDER); 289 290 return &vm->vm_freelist[idx]; 291 } 292 293 static struct vmem_freelist * 294 bt_freehead_toalloc(vmem_t *vm, vmem_size_t size, vm_flag_t strat) 295 { 296 const vmem_size_t qsize = size >> vm->vm_quantum_shift; 297 int idx; 298 299 KASSERT((size & vm->vm_quantum_mask) == 0); 300 KASSERT(size != 0); 301 302 idx = calc_order(qsize); 303 if (strat == VM_INSTANTFIT && ORDER2SIZE(idx) != qsize) { 304 idx++; 305 /* check too large request? */ 306 } 307 KASSERT(idx >= 0); 308 KASSERT(idx < VMEM_MAXORDER); 309 310 return &vm->vm_freelist[idx]; 311 } 312 313 /* ---- boundary tag hash */ 314 315 static struct vmem_hashlist * 316 bt_hashhead(vmem_t *vm, vmem_addr_t addr) 317 { 318 struct vmem_hashlist *list; 319 unsigned int hash; 320 321 hash = hash32_buf(&addr, sizeof(addr), HASH32_BUF_INIT); 322 list = &vm->vm_hashlist[hash % vm->vm_hashsize]; 323 324 return list; 325 } 326 327 static bt_t * 328 bt_lookupbusy(vmem_t *vm, vmem_addr_t addr) 329 { 330 struct vmem_hashlist *list; 331 bt_t *bt; 332 333 list = bt_hashhead(vm, addr); 334 LIST_FOREACH(bt, list, bt_hashlist) { 335 if (bt->bt_start == addr) { 336 break; 337 } 338 } 339 340 return bt; 341 } 342 343 static void 344 bt_rembusy(vmem_t *vm, bt_t *bt) 345 { 346 347 KASSERT(vm->vm_nbusytag > 0); 348 vm->vm_nbusytag--; 349 LIST_REMOVE(bt, bt_hashlist); 350 } 351 352 static void 353 bt_insbusy(vmem_t *vm, bt_t *bt) 354 { 355 struct vmem_hashlist *list; 356 357 KASSERT(bt->bt_type == BT_TYPE_BUSY); 358 359 list = bt_hashhead(vm, bt->bt_start); 360 LIST_INSERT_HEAD(list, bt, bt_hashlist); 361 vm->vm_nbusytag++; 362 } 363 364 /* ---- boundary tag list */ 365 366 static void 367 bt_remseg(vmem_t *vm, bt_t *bt) 368 { 369 370 CIRCLEQ_REMOVE(&vm->vm_seglist, bt, bt_seglist); 371 } 372 373 static void 374 bt_insseg(vmem_t *vm, bt_t *bt, bt_t *prev) 375 { 376 377 CIRCLEQ_INSERT_AFTER(&vm->vm_seglist, prev, bt, bt_seglist); 378 } 379 380 static void 381 bt_insseg_tail(vmem_t *vm, bt_t *bt) 382 { 383 384 CIRCLEQ_INSERT_TAIL(&vm->vm_seglist, bt, bt_seglist); 385 } 386 387 static void 388 bt_remfree(vmem_t *vm, bt_t *bt) 389 { 390 391 KASSERT(bt->bt_type == BT_TYPE_FREE); 392 393 LIST_REMOVE(bt, bt_freelist); 394 } 395 396 static void 397 bt_insfree(vmem_t *vm, bt_t *bt) 398 { 399 struct vmem_freelist *list; 400 401 list = bt_freehead_tofree(vm, bt->bt_size); 402 LIST_INSERT_HEAD(list, bt, bt_freelist); 403 } 404 405 /* ---- vmem internal functions */ 406 407 #if defined(QCACHE) 408 static inline vm_flag_t 409 prf_to_vmf(int prflags) 410 { 411 vm_flag_t vmflags; 412 413 KASSERT((prflags & ~(PR_LIMITFAIL | PR_WAITOK | PR_NOWAIT)) == 0); 414 if ((prflags & PR_WAITOK) != 0) { 415 vmflags = VM_SLEEP; 416 } else { 417 vmflags = VM_NOSLEEP; 418 } 419 return vmflags; 420 } 421 422 static inline int 423 vmf_to_prf(vm_flag_t vmflags) 424 { 425 int prflags; 426 427 if ((vmflags & VM_SLEEP) != 0) { 428 prflags = PR_WAITOK; 429 } else { 430 prflags = PR_NOWAIT; 431 } 432 return prflags; 433 } 434 435 static size_t 436 qc_poolpage_size(size_t qcache_max) 437 { 438 int i; 439 440 for (i = 0; ORDER2SIZE(i) <= qcache_max * 3; i++) { 441 /* nothing */ 442 } 443 return ORDER2SIZE(i); 444 } 445 446 static void * 447 qc_poolpage_alloc(struct pool *pool, int prflags) 448 { 449 qcache_t *qc = QC_POOL_TO_QCACHE(pool); 450 vmem_t *vm = qc->qc_vmem; 451 452 return (void *)vmem_alloc(vm, pool->pr_alloc->pa_pagesz, 453 prf_to_vmf(prflags) | VM_INSTANTFIT); 454 } 455 456 static void 457 qc_poolpage_free(struct pool *pool, void *addr) 458 { 459 qcache_t *qc = QC_POOL_TO_QCACHE(pool); 460 vmem_t *vm = qc->qc_vmem; 461 462 vmem_free(vm, (vmem_addr_t)addr, pool->pr_alloc->pa_pagesz); 463 } 464 465 static void 466 qc_init(vmem_t *vm, size_t qcache_max) 467 { 468 qcache_t *prevqc; 469 struct pool_allocator *pa; 470 int qcache_idx_max; 471 int i; 472 473 KASSERT((qcache_max & vm->vm_quantum_mask) == 0); 474 if (qcache_max > (VMEM_QCACHE_IDX_MAX << vm->vm_quantum_shift)) { 475 qcache_max = VMEM_QCACHE_IDX_MAX << vm->vm_quantum_shift; 476 } 477 vm->vm_qcache_max = qcache_max; 478 pa = &vm->vm_qcache_allocator; 479 memset(pa, 0, sizeof(*pa)); 480 pa->pa_alloc = qc_poolpage_alloc; 481 pa->pa_free = qc_poolpage_free; 482 pa->pa_pagesz = qc_poolpage_size(qcache_max); 483 484 qcache_idx_max = qcache_max >> vm->vm_quantum_shift; 485 prevqc = NULL; 486 for (i = qcache_idx_max; i > 0; i--) { 487 qcache_t *qc = &vm->vm_qcache_store[i - 1]; 488 size_t size = i << vm->vm_quantum_shift; 489 490 qc->qc_vmem = vm; 491 snprintf(qc->qc_name, sizeof(qc->qc_name), "%s-%zu", 492 vm->vm_name, size); 493 pool_init(&qc->qc_pool, size, ORDER2SIZE(vm->vm_quantum_shift), 494 0, PR_NOALIGN | PR_NOTOUCH /* XXX */, qc->qc_name, pa); 495 if (prevqc != NULL && 496 qc->qc_pool.pr_itemsperpage == 497 prevqc->qc_pool.pr_itemsperpage) { 498 pool_destroy(&qc->qc_pool); 499 vm->vm_qcache[i - 1] = prevqc; 500 } 501 pool_cache_init(&qc->qc_cache, &qc->qc_pool, NULL, NULL, NULL); 502 vm->vm_qcache[i - 1] = qc; 503 prevqc = qc; 504 } 505 } 506 507 static void 508 qc_destroy(vmem_t *vm) 509 { 510 const qcache_t *prevqc; 511 int i; 512 int qcache_idx_max; 513 514 qcache_idx_max = vm->vm_qcache_max >> vm->vm_quantum_shift; 515 prevqc = NULL; 516 for (i = 0; i < qcache_idx_max; i++) { 517 qcache_t *qc = vm->vm_qcache[i]; 518 519 if (prevqc == qc) { 520 continue; 521 } 522 pool_cache_destroy(&qc->qc_cache); 523 pool_destroy(&qc->qc_pool); 524 prevqc = qc; 525 } 526 } 527 528 static boolean_t 529 qc_reap(vmem_t *vm) 530 { 531 const qcache_t *prevqc; 532 int i; 533 int qcache_idx_max; 534 boolean_t didsomething = FALSE; 535 536 qcache_idx_max = vm->vm_qcache_max >> vm->vm_quantum_shift; 537 prevqc = NULL; 538 for (i = 0; i < qcache_idx_max; i++) { 539 qcache_t *qc = vm->vm_qcache[i]; 540 541 if (prevqc == qc) { 542 continue; 543 } 544 if (pool_reclaim(&qc->qc_pool) != 0) { 545 didsomething = TRUE; 546 } 547 prevqc = qc; 548 } 549 550 return didsomething; 551 } 552 #endif /* defined(QCACHE) */ 553 554 #if defined(_KERNEL) 555 static int 556 vmem_init(void) 557 { 558 559 pool_cache_init(&bt_poolcache, &bt_pool, NULL, NULL, NULL); 560 return 0; 561 } 562 #endif /* defined(_KERNEL) */ 563 564 static vmem_addr_t 565 vmem_add1(vmem_t *vm, vmem_addr_t addr, vmem_size_t size, vm_flag_t flags, 566 int spanbttype) 567 { 568 bt_t *btspan; 569 bt_t *btfree; 570 571 KASSERT((flags & (VM_SLEEP|VM_NOSLEEP)) != 0); 572 KASSERT((~flags & (VM_SLEEP|VM_NOSLEEP)) != 0); 573 VMEM_ASSERT_UNLOCKED(vm); 574 575 btspan = bt_alloc(vm, flags); 576 if (btspan == NULL) { 577 return VMEM_ADDR_NULL; 578 } 579 btfree = bt_alloc(vm, flags); 580 if (btfree == NULL) { 581 bt_free(vm, btspan); 582 return VMEM_ADDR_NULL; 583 } 584 585 btspan->bt_type = spanbttype; 586 btspan->bt_start = addr; 587 btspan->bt_size = size; 588 589 btfree->bt_type = BT_TYPE_FREE; 590 btfree->bt_start = addr; 591 btfree->bt_size = size; 592 593 VMEM_LOCK(vm); 594 bt_insseg_tail(vm, btspan); 595 bt_insseg(vm, btfree, btspan); 596 bt_insfree(vm, btfree); 597 VMEM_UNLOCK(vm); 598 599 return addr; 600 } 601 602 static int 603 vmem_import(vmem_t *vm, vmem_size_t size, vm_flag_t flags) 604 { 605 vmem_addr_t addr; 606 607 VMEM_ASSERT_UNLOCKED(vm); 608 609 if (vm->vm_allocfn == NULL) { 610 return EINVAL; 611 } 612 613 addr = (*vm->vm_allocfn)(vm->vm_source, size, &size, flags); 614 if (addr == VMEM_ADDR_NULL) { 615 return ENOMEM; 616 } 617 618 if (vmem_add1(vm, addr, size, flags, BT_TYPE_SPAN) == VMEM_ADDR_NULL) { 619 (*vm->vm_freefn)(vm->vm_source, addr, size); 620 return ENOMEM; 621 } 622 623 return 0; 624 } 625 626 static int 627 vmem_rehash(vmem_t *vm, size_t newhashsize, vm_flag_t flags) 628 { 629 bt_t *bt; 630 int i; 631 struct vmem_hashlist *newhashlist; 632 struct vmem_hashlist *oldhashlist; 633 size_t oldhashsize; 634 635 KASSERT(newhashsize > 0); 636 VMEM_ASSERT_UNLOCKED(vm); 637 638 newhashlist = 639 xmalloc(sizeof(struct vmem_hashlist *) * newhashsize, flags); 640 if (newhashlist == NULL) { 641 return ENOMEM; 642 } 643 for (i = 0; i < newhashsize; i++) { 644 LIST_INIT(&newhashlist[i]); 645 } 646 647 VMEM_LOCK(vm); 648 oldhashlist = vm->vm_hashlist; 649 oldhashsize = vm->vm_hashsize; 650 vm->vm_hashlist = newhashlist; 651 vm->vm_hashsize = newhashsize; 652 if (oldhashlist == NULL) { 653 VMEM_UNLOCK(vm); 654 return 0; 655 } 656 for (i = 0; i < oldhashsize; i++) { 657 while ((bt = LIST_FIRST(&oldhashlist[i])) != NULL) { 658 bt_rembusy(vm, bt); /* XXX */ 659 bt_insbusy(vm, bt); 660 } 661 } 662 VMEM_UNLOCK(vm); 663 664 xfree(oldhashlist); 665 666 return 0; 667 } 668 669 /* 670 * vmem_fit: check if a bt can satisfy the given restrictions. 671 */ 672 673 static vmem_addr_t 674 vmem_fit(const bt_t *bt, vmem_size_t size, vmem_size_t align, vmem_size_t phase, 675 vmem_size_t nocross, vmem_addr_t minaddr, vmem_addr_t maxaddr) 676 { 677 vmem_addr_t start; 678 vmem_addr_t end; 679 680 KASSERT(bt->bt_size >= size); 681 682 /* 683 * XXX assumption: vmem_addr_t and vmem_size_t are 684 * unsigned integer of the same size. 685 */ 686 687 start = bt->bt_start; 688 if (start < minaddr) { 689 start = minaddr; 690 } 691 end = BT_END(bt); 692 if (end > maxaddr - 1) { 693 end = maxaddr - 1; 694 } 695 if (start >= end) { 696 return VMEM_ADDR_NULL; 697 } 698 699 start = VMEM_ALIGNUP(start - phase, align) + phase; 700 if (start < bt->bt_start) { 701 start += align; 702 } 703 if (VMEM_CROSS_P(start, start + size - 1, nocross)) { 704 KASSERT(align < nocross); 705 start = VMEM_ALIGNUP(start - phase, nocross) + phase; 706 } 707 if (start < end && end - start >= size) { 708 KASSERT((start & (align - 1)) == phase); 709 KASSERT(!VMEM_CROSS_P(start, start + size - 1, nocross)); 710 KASSERT(minaddr <= start); 711 KASSERT(maxaddr == 0 || start + size <= maxaddr); 712 KASSERT(bt->bt_start <= start); 713 KASSERT(start + size <= BT_END(bt)); 714 return start; 715 } 716 return VMEM_ADDR_NULL; 717 } 718 719 /* ---- vmem API */ 720 721 /* 722 * vmem_create: create an arena. 723 * 724 * => must not be called from interrupt context. 725 */ 726 727 vmem_t * 728 vmem_create(const char *name, vmem_addr_t base, vmem_size_t size, 729 vmem_size_t quantum, 730 vmem_addr_t (*allocfn)(vmem_t *, vmem_size_t, vmem_size_t *, vm_flag_t), 731 void (*freefn)(vmem_t *, vmem_addr_t, vmem_size_t), 732 vmem_t *source, vmem_size_t qcache_max, vm_flag_t flags) 733 { 734 vmem_t *vm; 735 int i; 736 #if defined(_KERNEL) 737 static ONCE_DECL(control); 738 #endif /* defined(_KERNEL) */ 739 740 KASSERT((flags & (VM_SLEEP|VM_NOSLEEP)) != 0); 741 KASSERT((~flags & (VM_SLEEP|VM_NOSLEEP)) != 0); 742 743 #if defined(_KERNEL) 744 if (RUN_ONCE(&control, vmem_init)) { 745 return NULL; 746 } 747 #endif /* defined(_KERNEL) */ 748 vm = xmalloc(sizeof(*vm), flags); 749 if (vm == NULL) { 750 return NULL; 751 } 752 753 VMEM_LOCK_INIT(vm); 754 vm->vm_name = name; 755 vm->vm_quantum_mask = quantum - 1; 756 vm->vm_quantum_shift = calc_order(quantum); 757 KASSERT(ORDER2SIZE(vm->vm_quantum_shift) == quantum); 758 vm->vm_allocfn = allocfn; 759 vm->vm_freefn = freefn; 760 vm->vm_source = source; 761 vm->vm_nbusytag = 0; 762 #if defined(QCACHE) 763 qc_init(vm, qcache_max); 764 #endif /* defined(QCACHE) */ 765 766 CIRCLEQ_INIT(&vm->vm_seglist); 767 for (i = 0; i < VMEM_MAXORDER; i++) { 768 LIST_INIT(&vm->vm_freelist[i]); 769 } 770 vm->vm_hashlist = NULL; 771 if (vmem_rehash(vm, VMEM_HASHSIZE_INIT, flags)) { 772 vmem_destroy(vm); 773 return NULL; 774 } 775 776 if (size != 0) { 777 if (vmem_add(vm, base, size, flags) == 0) { 778 vmem_destroy(vm); 779 return NULL; 780 } 781 } 782 783 return vm; 784 } 785 786 void 787 vmem_destroy(vmem_t *vm) 788 { 789 790 VMEM_ASSERT_UNLOCKED(vm); 791 792 #if defined(QCACHE) 793 qc_destroy(vm); 794 #endif /* defined(QCACHE) */ 795 if (vm->vm_hashlist != NULL) { 796 int i; 797 798 for (i = 0; i < vm->vm_hashsize; i++) { 799 bt_t *bt; 800 801 while ((bt = LIST_FIRST(&vm->vm_hashlist[i])) != NULL) { 802 KASSERT(bt->bt_type == BT_TYPE_SPAN_STATIC); 803 bt_free(vm, bt); 804 } 805 } 806 xfree(vm->vm_hashlist); 807 } 808 xfree(vm); 809 } 810 811 vmem_size_t 812 vmem_roundup_size(vmem_t *vm, vmem_size_t size) 813 { 814 815 return (size + vm->vm_quantum_mask) & ~vm->vm_quantum_mask; 816 } 817 818 /* 819 * vmem_alloc: 820 * 821 * => caller must ensure appropriate spl, 822 * if the arena can be accessed from interrupt context. 823 */ 824 825 vmem_addr_t 826 vmem_alloc(vmem_t *vm, vmem_size_t size0, vm_flag_t flags) 827 { 828 const vmem_size_t size __unused = vmem_roundup_size(vm, size0); 829 const vm_flag_t strat __unused = flags & VM_FITMASK; 830 831 KASSERT((flags & (VM_SLEEP|VM_NOSLEEP)) != 0); 832 KASSERT((~flags & (VM_SLEEP|VM_NOSLEEP)) != 0); 833 VMEM_ASSERT_UNLOCKED(vm); 834 835 KASSERT(size0 > 0); 836 KASSERT(size > 0); 837 KASSERT(strat == VM_BESTFIT || strat == VM_INSTANTFIT); 838 if ((flags & VM_SLEEP) != 0) { 839 ASSERT_SLEEPABLE(NULL, __func__); 840 } 841 842 #if defined(QCACHE) 843 if (size <= vm->vm_qcache_max) { 844 int qidx = size >> vm->vm_quantum_shift; 845 qcache_t *qc = vm->vm_qcache[qidx - 1]; 846 847 return (vmem_addr_t)pool_cache_get(&qc->qc_cache, 848 vmf_to_prf(flags)); 849 } 850 #endif /* defined(QCACHE) */ 851 852 return vmem_xalloc(vm, size0, 0, 0, 0, 0, 0, flags); 853 } 854 855 vmem_addr_t 856 vmem_xalloc(vmem_t *vm, vmem_size_t size0, vmem_size_t align, vmem_size_t phase, 857 vmem_size_t nocross, vmem_addr_t minaddr, vmem_addr_t maxaddr, 858 vm_flag_t flags) 859 { 860 struct vmem_freelist *list; 861 struct vmem_freelist *first; 862 struct vmem_freelist *end; 863 bt_t *bt; 864 bt_t *btnew; 865 bt_t *btnew2; 866 const vmem_size_t size = vmem_roundup_size(vm, size0); 867 vm_flag_t strat = flags & VM_FITMASK; 868 vmem_addr_t start; 869 870 KASSERT(size0 > 0); 871 KASSERT(size > 0); 872 KASSERT(strat == VM_BESTFIT || strat == VM_INSTANTFIT); 873 if ((flags & VM_SLEEP) != 0) { 874 ASSERT_SLEEPABLE(NULL, __func__); 875 } 876 KASSERT((align & vm->vm_quantum_mask) == 0); 877 KASSERT((align & (align - 1)) == 0); 878 KASSERT((phase & vm->vm_quantum_mask) == 0); 879 KASSERT((nocross & vm->vm_quantum_mask) == 0); 880 KASSERT((nocross & (nocross - 1)) == 0); 881 KASSERT((align == 0 && phase == 0) || phase < align); 882 KASSERT(nocross == 0 || nocross >= size); 883 KASSERT(maxaddr == 0 || minaddr < maxaddr); 884 KASSERT(!VMEM_CROSS_P(phase, phase + size - 1, nocross)); 885 886 if (align == 0) { 887 align = vm->vm_quantum_mask + 1; 888 } 889 btnew = bt_alloc(vm, flags); 890 if (btnew == NULL) { 891 return VMEM_ADDR_NULL; 892 } 893 btnew2 = bt_alloc(vm, flags); /* XXX not necessary if no restrictions */ 894 if (btnew2 == NULL) { 895 bt_free(vm, btnew); 896 return VMEM_ADDR_NULL; 897 } 898 899 retry_strat: 900 first = bt_freehead_toalloc(vm, size, strat); 901 end = &vm->vm_freelist[VMEM_MAXORDER]; 902 retry: 903 bt = NULL; 904 VMEM_LOCK(vm); 905 if (strat == VM_INSTANTFIT) { 906 for (list = first; list < end; list++) { 907 bt = LIST_FIRST(list); 908 if (bt != NULL) { 909 start = vmem_fit(bt, size, align, phase, 910 nocross, minaddr, maxaddr); 911 if (start != VMEM_ADDR_NULL) { 912 goto gotit; 913 } 914 } 915 } 916 } else { /* VM_BESTFIT */ 917 for (list = first; list < end; list++) { 918 LIST_FOREACH(bt, list, bt_freelist) { 919 if (bt->bt_size >= size) { 920 start = vmem_fit(bt, size, align, phase, 921 nocross, minaddr, maxaddr); 922 if (start != VMEM_ADDR_NULL) { 923 goto gotit; 924 } 925 } 926 } 927 } 928 } 929 VMEM_UNLOCK(vm); 930 #if 1 931 if (strat == VM_INSTANTFIT) { 932 strat = VM_BESTFIT; 933 goto retry_strat; 934 } 935 #endif 936 if (align != vm->vm_quantum_mask + 1 || phase != 0 || 937 nocross != 0 || minaddr != 0 || maxaddr != 0) { 938 939 /* 940 * XXX should try to import a region large enough to 941 * satisfy restrictions? 942 */ 943 944 goto fail; 945 } 946 if (vmem_import(vm, size, flags) == 0) { 947 goto retry; 948 } 949 /* XXX */ 950 fail: 951 bt_free(vm, btnew); 952 bt_free(vm, btnew2); 953 return VMEM_ADDR_NULL; 954 955 gotit: 956 KASSERT(bt->bt_type == BT_TYPE_FREE); 957 KASSERT(bt->bt_size >= size); 958 bt_remfree(vm, bt); 959 if (bt->bt_start != start) { 960 btnew2->bt_type = BT_TYPE_FREE; 961 btnew2->bt_start = bt->bt_start; 962 btnew2->bt_size = start - bt->bt_start; 963 bt->bt_start = start; 964 bt->bt_size -= btnew2->bt_size; 965 bt_insfree(vm, btnew2); 966 bt_insseg(vm, btnew2, CIRCLEQ_PREV(bt, bt_seglist)); 967 btnew2 = NULL; 968 } 969 KASSERT(bt->bt_start == start); 970 if (bt->bt_size != size && bt->bt_size - size > vm->vm_quantum_mask) { 971 /* split */ 972 btnew->bt_type = BT_TYPE_BUSY; 973 btnew->bt_start = bt->bt_start; 974 btnew->bt_size = size; 975 bt->bt_start = bt->bt_start + size; 976 bt->bt_size -= size; 977 bt_insfree(vm, bt); 978 bt_insseg(vm, btnew, CIRCLEQ_PREV(bt, bt_seglist)); 979 bt_insbusy(vm, btnew); 980 VMEM_UNLOCK(vm); 981 } else { 982 bt->bt_type = BT_TYPE_BUSY; 983 bt_insbusy(vm, bt); 984 VMEM_UNLOCK(vm); 985 bt_free(vm, btnew); 986 btnew = bt; 987 } 988 if (btnew2 != NULL) { 989 bt_free(vm, btnew2); 990 } 991 KASSERT(btnew->bt_size >= size); 992 btnew->bt_type = BT_TYPE_BUSY; 993 994 return btnew->bt_start; 995 } 996 997 /* 998 * vmem_free: 999 * 1000 * => caller must ensure appropriate spl, 1001 * if the arena can be accessed from interrupt context. 1002 */ 1003 1004 void 1005 vmem_free(vmem_t *vm, vmem_addr_t addr, vmem_size_t size) 1006 { 1007 1008 VMEM_ASSERT_UNLOCKED(vm); 1009 KASSERT(addr != VMEM_ADDR_NULL); 1010 KASSERT(size > 0); 1011 1012 #if defined(QCACHE) 1013 if (size <= vm->vm_qcache_max) { 1014 int qidx = (size + vm->vm_quantum_mask) >> vm->vm_quantum_shift; 1015 qcache_t *qc = vm->vm_qcache[qidx - 1]; 1016 1017 return pool_cache_put(&qc->qc_cache, (void *)addr); 1018 } 1019 #endif /* defined(QCACHE) */ 1020 1021 vmem_xfree(vm, addr, size); 1022 } 1023 1024 void 1025 vmem_xfree(vmem_t *vm, vmem_addr_t addr, vmem_size_t size) 1026 { 1027 bt_t *bt; 1028 bt_t *t; 1029 1030 VMEM_ASSERT_UNLOCKED(vm); 1031 KASSERT(addr != VMEM_ADDR_NULL); 1032 KASSERT(size > 0); 1033 1034 VMEM_LOCK(vm); 1035 1036 bt = bt_lookupbusy(vm, addr); 1037 KASSERT(bt != NULL); 1038 KASSERT(bt->bt_start == addr); 1039 KASSERT(bt->bt_size == vmem_roundup_size(vm, size) || 1040 bt->bt_size - vmem_roundup_size(vm, size) <= vm->vm_quantum_mask); 1041 KASSERT(bt->bt_type == BT_TYPE_BUSY); 1042 bt_rembusy(vm, bt); 1043 bt->bt_type = BT_TYPE_FREE; 1044 1045 /* coalesce */ 1046 t = CIRCLEQ_NEXT(bt, bt_seglist); 1047 if (t != NULL && t->bt_type == BT_TYPE_FREE) { 1048 KASSERT(BT_END(bt) == t->bt_start); 1049 bt_remfree(vm, t); 1050 bt_remseg(vm, t); 1051 bt->bt_size += t->bt_size; 1052 bt_free(vm, t); 1053 } 1054 t = CIRCLEQ_PREV(bt, bt_seglist); 1055 if (t != NULL && t->bt_type == BT_TYPE_FREE) { 1056 KASSERT(BT_END(t) == bt->bt_start); 1057 bt_remfree(vm, t); 1058 bt_remseg(vm, t); 1059 bt->bt_size += t->bt_size; 1060 bt->bt_start = t->bt_start; 1061 bt_free(vm, t); 1062 } 1063 1064 t = CIRCLEQ_PREV(bt, bt_seglist); 1065 KASSERT(t != NULL); 1066 KASSERT(BT_ISSPAN_P(t) || t->bt_type == BT_TYPE_BUSY); 1067 if (vm->vm_freefn != NULL && t->bt_type == BT_TYPE_SPAN && 1068 t->bt_size == bt->bt_size) { 1069 vmem_addr_t spanaddr; 1070 vmem_size_t spansize; 1071 1072 KASSERT(t->bt_start == bt->bt_start); 1073 spanaddr = bt->bt_start; 1074 spansize = bt->bt_size; 1075 bt_remseg(vm, bt); 1076 bt_free(vm, bt); 1077 bt_remseg(vm, t); 1078 bt_free(vm, t); 1079 VMEM_UNLOCK(vm); 1080 (*vm->vm_freefn)(vm->vm_source, spanaddr, spansize); 1081 } else { 1082 bt_insfree(vm, bt); 1083 VMEM_UNLOCK(vm); 1084 } 1085 } 1086 1087 /* 1088 * vmem_add: 1089 * 1090 * => caller must ensure appropriate spl, 1091 * if the arena can be accessed from interrupt context. 1092 */ 1093 1094 vmem_addr_t 1095 vmem_add(vmem_t *vm, vmem_addr_t addr, vmem_size_t size, vm_flag_t flags) 1096 { 1097 1098 return vmem_add1(vm, addr, size, flags, BT_TYPE_SPAN_STATIC); 1099 } 1100 1101 /* 1102 * vmem_reap: reap unused resources. 1103 * 1104 * => return TRUE if we successfully reaped something. 1105 */ 1106 1107 boolean_t 1108 vmem_reap(vmem_t *vm) 1109 { 1110 boolean_t didsomething = FALSE; 1111 1112 VMEM_ASSERT_UNLOCKED(vm); 1113 1114 #if defined(QCACHE) 1115 didsomething = qc_reap(vm); 1116 #endif /* defined(QCACHE) */ 1117 return didsomething; 1118 } 1119 1120 /* ---- debug */ 1121 1122 #if defined(VMEM_DEBUG) 1123 1124 #if !defined(_KERNEL) 1125 #include <stdio.h> 1126 #endif /* !defined(_KERNEL) */ 1127 1128 void bt_dump(const bt_t *); 1129 1130 void 1131 bt_dump(const bt_t *bt) 1132 { 1133 1134 printf("\t%p: %" PRIu64 ", %" PRIu64 ", %d\n", 1135 bt, (uint64_t)bt->bt_start, (uint64_t)bt->bt_size, 1136 bt->bt_type); 1137 } 1138 1139 void 1140 vmem_dump(const vmem_t *vm) 1141 { 1142 const bt_t *bt; 1143 int i; 1144 1145 printf("vmem %p '%s'\n", vm, vm->vm_name); 1146 CIRCLEQ_FOREACH(bt, &vm->vm_seglist, bt_seglist) { 1147 bt_dump(bt); 1148 } 1149 1150 for (i = 0; i < VMEM_MAXORDER; i++) { 1151 const struct vmem_freelist *fl = &vm->vm_freelist[i]; 1152 1153 if (LIST_EMPTY(fl)) { 1154 continue; 1155 } 1156 1157 printf("freelist[%d]\n", i); 1158 LIST_FOREACH(bt, fl, bt_freelist) { 1159 bt_dump(bt); 1160 if (bt->bt_size) { 1161 } 1162 } 1163 } 1164 } 1165 1166 #if !defined(_KERNEL) 1167 1168 #include <stdlib.h> 1169 1170 int 1171 main() 1172 { 1173 vmem_t *vm; 1174 vmem_addr_t p; 1175 struct reg { 1176 vmem_addr_t p; 1177 vmem_size_t sz; 1178 boolean_t x; 1179 } *reg = NULL; 1180 int nreg = 0; 1181 int nalloc = 0; 1182 int nfree = 0; 1183 vmem_size_t total = 0; 1184 #if 1 1185 vm_flag_t strat = VM_INSTANTFIT; 1186 #else 1187 vm_flag_t strat = VM_BESTFIT; 1188 #endif 1189 1190 vm = vmem_create("test", VMEM_ADDR_NULL, 0, 1, 1191 NULL, NULL, NULL, 0, VM_NOSLEEP); 1192 if (vm == NULL) { 1193 printf("vmem_create\n"); 1194 exit(EXIT_FAILURE); 1195 } 1196 vmem_dump(vm); 1197 1198 p = vmem_add(vm, 100, 200, VM_SLEEP); 1199 p = vmem_add(vm, 2000, 1, VM_SLEEP); 1200 p = vmem_add(vm, 40000, 0x10000000>>12, VM_SLEEP); 1201 p = vmem_add(vm, 10000, 10000, VM_SLEEP); 1202 p = vmem_add(vm, 500, 1000, VM_SLEEP); 1203 vmem_dump(vm); 1204 for (;;) { 1205 struct reg *r; 1206 int t = rand() % 100; 1207 1208 if (t > 45) { 1209 /* alloc */ 1210 vmem_size_t sz = rand() % 500 + 1; 1211 boolean_t x; 1212 vmem_size_t align, phase, nocross; 1213 vmem_addr_t minaddr, maxaddr; 1214 1215 if (t > 70) { 1216 x = TRUE; 1217 /* XXX */ 1218 align = 1 << (rand() % 15); 1219 phase = rand() % 65536; 1220 nocross = 1 << (rand() % 15); 1221 if (align <= phase) { 1222 phase = 0; 1223 } 1224 if (VMEM_CROSS_P(phase, phase + sz - 1, 1225 nocross)) { 1226 nocross = 0; 1227 } 1228 minaddr = rand() % 50000; 1229 maxaddr = rand() % 70000; 1230 if (minaddr > maxaddr) { 1231 minaddr = 0; 1232 maxaddr = 0; 1233 } 1234 printf("=== xalloc %" PRIu64 1235 " align=%" PRIu64 ", phase=%" PRIu64 1236 ", nocross=%" PRIu64 ", min=%" PRIu64 1237 ", max=%" PRIu64 "\n", 1238 (uint64_t)sz, 1239 (uint64_t)align, 1240 (uint64_t)phase, 1241 (uint64_t)nocross, 1242 (uint64_t)minaddr, 1243 (uint64_t)maxaddr); 1244 p = vmem_xalloc(vm, sz, align, phase, nocross, 1245 minaddr, maxaddr, strat|VM_SLEEP); 1246 } else { 1247 x = FALSE; 1248 printf("=== alloc %" PRIu64 "\n", (uint64_t)sz); 1249 p = vmem_alloc(vm, sz, strat|VM_SLEEP); 1250 } 1251 printf("-> %" PRIu64 "\n", (uint64_t)p); 1252 vmem_dump(vm); 1253 if (p == VMEM_ADDR_NULL) { 1254 if (x) { 1255 continue; 1256 } 1257 break; 1258 } 1259 nreg++; 1260 reg = realloc(reg, sizeof(*reg) * nreg); 1261 r = ®[nreg - 1]; 1262 r->p = p; 1263 r->sz = sz; 1264 r->x = x; 1265 total += sz; 1266 nalloc++; 1267 } else if (nreg != 0) { 1268 /* free */ 1269 r = ®[rand() % nreg]; 1270 printf("=== free %" PRIu64 ", %" PRIu64 "\n", 1271 (uint64_t)r->p, (uint64_t)r->sz); 1272 if (r->x) { 1273 vmem_xfree(vm, r->p, r->sz); 1274 } else { 1275 vmem_free(vm, r->p, r->sz); 1276 } 1277 total -= r->sz; 1278 vmem_dump(vm); 1279 *r = reg[nreg - 1]; 1280 nreg--; 1281 nfree++; 1282 } 1283 printf("total=%" PRIu64 "\n", (uint64_t)total); 1284 } 1285 fprintf(stderr, "total=%" PRIu64 ", nalloc=%d, nfree=%d\n", 1286 (uint64_t)total, nalloc, nfree); 1287 exit(EXIT_SUCCESS); 1288 } 1289 #endif /* !defined(_KERNEL) */ 1290 #endif /* defined(VMEM_DEBUG) */ 1291