1 /* $NetBSD: subr_vmem.c,v 1.20 2006/11/09 10:08:53 yamt Exp $ */ 2 3 /*- 4 * Copyright (c)2006 YAMAMOTO Takashi, 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 */ 28 29 /* 30 * reference: 31 * - Magazines and Vmem: Extending the Slab Allocator 32 * to Many CPUs and Arbitrary Resources 33 * http://www.usenix.org/event/usenix01/bonwick.html 34 * 35 * todo: 36 * - decide how to import segments for vmem_xalloc. 37 * - don't rely on malloc(9). 38 */ 39 40 #include <sys/cdefs.h> 41 __KERNEL_RCSID(0, "$NetBSD: subr_vmem.c,v 1.20 2006/11/09 10:08:53 yamt Exp $"); 42 43 #define VMEM_DEBUG 44 #if defined(_KERNEL) 45 #define QCACHE 46 #endif /* defined(_KERNEL) */ 47 48 #include <sys/param.h> 49 #include <sys/hash.h> 50 #include <sys/queue.h> 51 52 #if defined(_KERNEL) 53 #include <sys/systm.h> 54 #include <sys/lock.h> 55 #include <sys/malloc.h> 56 #include <sys/once.h> 57 #include <sys/pool.h> 58 #include <sys/proc.h> 59 #include <sys/vmem.h> 60 #else /* defined(_KERNEL) */ 61 #include "../sys/vmem.h" 62 #endif /* defined(_KERNEL) */ 63 64 #if defined(_KERNEL) 65 #define SIMPLELOCK_DECL(name) struct simplelock name 66 #else /* defined(_KERNEL) */ 67 #include <errno.h> 68 #include <assert.h> 69 #include <stdlib.h> 70 71 #define KASSERT(a) assert(a) 72 #define SIMPLELOCK_DECL(name) /* nothing */ 73 #define LOCK_ASSERT(a) /* nothing */ 74 #define simple_lock_init(a) /* nothing */ 75 #define simple_lock(a) /* nothing */ 76 #define simple_unlock(a) /* nothing */ 77 #define ASSERT_SLEEPABLE(lk, msg) /* nothing */ 78 #endif /* defined(_KERNEL) */ 79 80 struct vmem; 81 struct vmem_btag; 82 83 #if defined(VMEM_DEBUG) 84 void vmem_dump(const vmem_t *); 85 #endif /* defined(VMEM_DEBUG) */ 86 87 #define VMEM_MAXORDER (sizeof(vmem_size_t) * CHAR_BIT) 88 #define VMEM_HASHSIZE_INIT 4096 /* XXX */ 89 90 #define VM_FITMASK (VM_BESTFIT | VM_INSTANTFIT) 91 92 CIRCLEQ_HEAD(vmem_seglist, vmem_btag); 93 LIST_HEAD(vmem_freelist, vmem_btag); 94 LIST_HEAD(vmem_hashlist, vmem_btag); 95 96 #if defined(QCACHE) 97 #define VMEM_QCACHE_IDX_MAX 32 98 99 #define QC_NAME_MAX 16 100 101 struct qcache { 102 struct pool qc_pool; 103 struct pool_cache qc_cache; 104 vmem_t *qc_vmem; 105 char qc_name[QC_NAME_MAX]; 106 }; 107 typedef struct qcache qcache_t; 108 #define QC_POOL_TO_QCACHE(pool) ((qcache_t *)(pool)) 109 #endif /* defined(QCACHE) */ 110 111 /* vmem arena */ 112 struct vmem { 113 SIMPLELOCK_DECL(vm_lock); 114 vmem_addr_t (*vm_allocfn)(vmem_t *, vmem_size_t, vmem_size_t *, 115 vm_flag_t); 116 void (*vm_freefn)(vmem_t *, vmem_addr_t, vmem_size_t); 117 vmem_t *vm_source; 118 struct vmem_seglist vm_seglist; 119 struct vmem_freelist vm_freelist[VMEM_MAXORDER]; 120 size_t vm_hashsize; 121 size_t vm_nbusytag; 122 struct vmem_hashlist *vm_hashlist; 123 size_t vm_quantum_mask; 124 int vm_quantum_shift; 125 const char *vm_name; 126 127 #if defined(QCACHE) 128 /* quantum cache */ 129 size_t vm_qcache_max; 130 struct pool_allocator vm_qcache_allocator; 131 qcache_t vm_qcache[VMEM_QCACHE_IDX_MAX]; 132 #endif /* defined(QCACHE) */ 133 }; 134 135 #define VMEM_LOCK(vm) simple_lock(&vm->vm_lock) 136 #define VMEM_UNLOCK(vm) simple_unlock(&vm->vm_lock) 137 #define VMEM_LOCK_INIT(vm) simple_lock_init(&vm->vm_lock); 138 #define VMEM_ASSERT_LOCKED(vm) \ 139 LOCK_ASSERT(simple_lock_held(&vm->vm_lock)) 140 #define VMEM_ASSERT_UNLOCKED(vm) \ 141 LOCK_ASSERT(!simple_lock_held(&vm->vm_lock)) 142 143 /* boundary tag */ 144 struct vmem_btag { 145 CIRCLEQ_ENTRY(vmem_btag) bt_seglist; 146 union { 147 LIST_ENTRY(vmem_btag) u_freelist; /* BT_TYPE_FREE */ 148 LIST_ENTRY(vmem_btag) u_hashlist; /* BT_TYPE_BUSY */ 149 } bt_u; 150 #define bt_hashlist bt_u.u_hashlist 151 #define bt_freelist bt_u.u_freelist 152 vmem_addr_t bt_start; 153 vmem_size_t bt_size; 154 int bt_type; 155 }; 156 157 #define BT_TYPE_SPAN 1 158 #define BT_TYPE_SPAN_STATIC 2 159 #define BT_TYPE_FREE 3 160 #define BT_TYPE_BUSY 4 161 #define BT_ISSPAN_P(bt) ((bt)->bt_type <= BT_TYPE_SPAN_STATIC) 162 163 #define BT_END(bt) ((bt)->bt_start + (bt)->bt_size) 164 165 typedef struct vmem_btag bt_t; 166 167 /* ---- misc */ 168 169 #define VMEM_ALIGNUP(addr, align) \ 170 (-(-(addr) & -(align))) 171 #define VMEM_CROSS_P(addr1, addr2, boundary) \ 172 ((((addr1) ^ (addr2)) & -(boundary)) != 0) 173 174 #define ORDER2SIZE(order) ((vmem_size_t)1 << (order)) 175 176 static int 177 calc_order(vmem_size_t size) 178 { 179 vmem_size_t target; 180 int i; 181 182 KASSERT(size != 0); 183 184 i = 0; 185 target = size >> 1; 186 while (ORDER2SIZE(i) <= target) { 187 i++; 188 } 189 190 KASSERT(ORDER2SIZE(i) <= size); 191 KASSERT(size < ORDER2SIZE(i + 1) || ORDER2SIZE(i + 1) < ORDER2SIZE(i)); 192 193 return i; 194 } 195 196 #if defined(_KERNEL) 197 static MALLOC_DEFINE(M_VMEM, "vmem", "vmem"); 198 #endif /* defined(_KERNEL) */ 199 200 static void * 201 xmalloc(size_t sz, vm_flag_t flags) 202 { 203 204 #if defined(_KERNEL) 205 return malloc(sz, M_VMEM, 206 M_CANFAIL | ((flags & VM_SLEEP) ? M_WAITOK : M_NOWAIT)); 207 #else /* defined(_KERNEL) */ 208 return malloc(sz); 209 #endif /* defined(_KERNEL) */ 210 } 211 212 static void 213 xfree(void *p) 214 { 215 216 #if defined(_KERNEL) 217 return free(p, M_VMEM); 218 #else /* defined(_KERNEL) */ 219 return free(p); 220 #endif /* defined(_KERNEL) */ 221 } 222 223 /* ---- boundary tag */ 224 225 #if defined(_KERNEL) 226 static struct pool_cache bt_poolcache; 227 static POOL_INIT(bt_pool, sizeof(bt_t), 0, 0, 0, "vmembtpl", NULL); 228 #endif /* defined(_KERNEL) */ 229 230 static bt_t * 231 bt_alloc(vmem_t *vm, vm_flag_t flags) 232 { 233 bt_t *bt; 234 235 #if defined(_KERNEL) 236 /* XXX bootstrap */ 237 bt = pool_cache_get(&bt_poolcache, 238 (flags & VM_SLEEP) != 0 ? PR_WAITOK : PR_NOWAIT); 239 #else /* defined(_KERNEL) */ 240 bt = malloc(sizeof *bt); 241 #endif /* defined(_KERNEL) */ 242 243 return bt; 244 } 245 246 static void 247 bt_free(vmem_t *vm, bt_t *bt) 248 { 249 250 #if defined(_KERNEL) 251 /* XXX bootstrap */ 252 pool_cache_put(&bt_poolcache, bt); 253 #else /* defined(_KERNEL) */ 254 free(bt); 255 #endif /* defined(_KERNEL) */ 256 } 257 258 /* 259 * freelist[0] ... [1, 1] 260 * freelist[1] ... [2, 3] 261 * freelist[2] ... [4, 7] 262 * freelist[3] ... [8, 15] 263 * : 264 * freelist[n] ... [(1 << n), (1 << (n + 1)) - 1] 265 * : 266 */ 267 268 static struct vmem_freelist * 269 bt_freehead_tofree(vmem_t *vm, vmem_size_t size) 270 { 271 const vmem_size_t qsize = size >> vm->vm_quantum_shift; 272 int idx; 273 274 KASSERT((size & vm->vm_quantum_mask) == 0); 275 KASSERT(size != 0); 276 277 idx = calc_order(qsize); 278 KASSERT(idx >= 0); 279 KASSERT(idx < VMEM_MAXORDER); 280 281 return &vm->vm_freelist[idx]; 282 } 283 284 static struct vmem_freelist * 285 bt_freehead_toalloc(vmem_t *vm, vmem_size_t size, vm_flag_t strat) 286 { 287 const vmem_size_t qsize = size >> vm->vm_quantum_shift; 288 int idx; 289 290 KASSERT((size & vm->vm_quantum_mask) == 0); 291 KASSERT(size != 0); 292 293 idx = calc_order(qsize); 294 if (strat == VM_INSTANTFIT && ORDER2SIZE(idx) != qsize) { 295 idx++; 296 /* check too large request? */ 297 } 298 KASSERT(idx >= 0); 299 KASSERT(idx < VMEM_MAXORDER); 300 301 return &vm->vm_freelist[idx]; 302 } 303 304 /* ---- boundary tag hash */ 305 306 static struct vmem_hashlist * 307 bt_hashhead(vmem_t *vm, vmem_addr_t addr) 308 { 309 struct vmem_hashlist *list; 310 unsigned int hash; 311 312 hash = hash32_buf(&addr, sizeof(addr), HASH32_BUF_INIT); 313 list = &vm->vm_hashlist[hash % vm->vm_hashsize]; 314 315 return list; 316 } 317 318 static bt_t * 319 bt_lookupbusy(vmem_t *vm, vmem_addr_t addr) 320 { 321 struct vmem_hashlist *list; 322 bt_t *bt; 323 324 list = bt_hashhead(vm, addr); 325 LIST_FOREACH(bt, list, bt_hashlist) { 326 if (bt->bt_start == addr) { 327 break; 328 } 329 } 330 331 return bt; 332 } 333 334 static void 335 bt_rembusy(vmem_t *vm, bt_t *bt) 336 { 337 338 KASSERT(vm->vm_nbusytag > 0); 339 vm->vm_nbusytag--; 340 LIST_REMOVE(bt, bt_hashlist); 341 } 342 343 static void 344 bt_insbusy(vmem_t *vm, bt_t *bt) 345 { 346 struct vmem_hashlist *list; 347 348 KASSERT(bt->bt_type == BT_TYPE_BUSY); 349 350 list = bt_hashhead(vm, bt->bt_start); 351 LIST_INSERT_HEAD(list, bt, bt_hashlist); 352 vm->vm_nbusytag++; 353 } 354 355 /* ---- boundary tag list */ 356 357 static void 358 bt_remseg(vmem_t *vm, bt_t *bt) 359 { 360 361 CIRCLEQ_REMOVE(&vm->vm_seglist, bt, bt_seglist); 362 } 363 364 static void 365 bt_insseg(vmem_t *vm, bt_t *bt, bt_t *prev) 366 { 367 368 CIRCLEQ_INSERT_AFTER(&vm->vm_seglist, prev, bt, bt_seglist); 369 } 370 371 static void 372 bt_insseg_tail(vmem_t *vm, bt_t *bt) 373 { 374 375 CIRCLEQ_INSERT_TAIL(&vm->vm_seglist, bt, bt_seglist); 376 } 377 378 static void 379 bt_remfree(vmem_t *vm, bt_t *bt) 380 { 381 382 KASSERT(bt->bt_type == BT_TYPE_FREE); 383 384 LIST_REMOVE(bt, bt_freelist); 385 } 386 387 static void 388 bt_insfree(vmem_t *vm, bt_t *bt) 389 { 390 struct vmem_freelist *list; 391 392 list = bt_freehead_tofree(vm, bt->bt_size); 393 LIST_INSERT_HEAD(list, bt, bt_freelist); 394 } 395 396 /* ---- vmem internal functions */ 397 398 #if defined(QCACHE) 399 static inline vm_flag_t 400 prf_to_vmf(int prflags) 401 { 402 vm_flag_t vmflags; 403 404 KASSERT((prflags & ~(PR_LIMITFAIL | PR_WAITOK | PR_NOWAIT)) == 0); 405 if ((prflags & PR_WAITOK) != 0) { 406 vmflags = VM_SLEEP; 407 } else { 408 vmflags = VM_NOSLEEP; 409 } 410 return vmflags; 411 } 412 413 static inline int 414 vmf_to_prf(vm_flag_t vmflags) 415 { 416 int prflags; 417 418 if ((vmflags & VM_SLEEP) != 0) { 419 prflags = PR_WAITOK; 420 } else { 421 prflags = PR_NOWAIT; 422 } 423 return prflags; 424 } 425 426 static size_t 427 qc_poolpage_size(size_t qcache_max) 428 { 429 int i; 430 431 for (i = 0; ORDER2SIZE(i) <= qcache_max * 3; i++) { 432 /* nothing */ 433 } 434 return ORDER2SIZE(i); 435 } 436 437 static void * 438 qc_poolpage_alloc(struct pool *pool, int prflags) 439 { 440 qcache_t *qc = QC_POOL_TO_QCACHE(pool); 441 vmem_t *vm = qc->qc_vmem; 442 443 return (void *)vmem_alloc(vm, pool->pr_alloc->pa_pagesz, 444 prf_to_vmf(prflags) | VM_INSTANTFIT); 445 } 446 447 static void 448 qc_poolpage_free(struct pool *pool, void *addr) 449 { 450 qcache_t *qc = QC_POOL_TO_QCACHE(pool); 451 vmem_t *vm = qc->qc_vmem; 452 453 vmem_free(vm, (vmem_addr_t)addr, pool->pr_alloc->pa_pagesz); 454 } 455 456 static void 457 qc_init(vmem_t *vm, size_t qcache_max) 458 { 459 struct pool_allocator *pa; 460 int qcache_idx_max; 461 int i; 462 463 KASSERT((qcache_max & vm->vm_quantum_mask) == 0); 464 if (qcache_max > (VMEM_QCACHE_IDX_MAX << vm->vm_quantum_shift)) { 465 qcache_max = VMEM_QCACHE_IDX_MAX << vm->vm_quantum_shift; 466 } 467 vm->vm_qcache_max = qcache_max; 468 pa = &vm->vm_qcache_allocator; 469 memset(pa, 0, sizeof(*pa)); 470 pa->pa_alloc = qc_poolpage_alloc; 471 pa->pa_free = qc_poolpage_free; 472 pa->pa_pagesz = qc_poolpage_size(qcache_max); 473 474 qcache_idx_max = qcache_max >> vm->vm_quantum_shift; 475 for (i = 1; i <= qcache_idx_max; i++) { 476 qcache_t *qc = &vm->vm_qcache[i - 1]; 477 size_t size = i << vm->vm_quantum_shift; 478 479 qc->qc_vmem = vm; 480 snprintf(qc->qc_name, sizeof(qc->qc_name), "%s-%zu", 481 vm->vm_name, size); 482 pool_init(&qc->qc_pool, size, ORDER2SIZE(vm->vm_quantum_shift), 483 0, PR_NOALIGN | PR_NOTOUCH /* XXX */, qc->qc_name, pa); 484 pool_cache_init(&qc->qc_cache, &qc->qc_pool, NULL, NULL, NULL); 485 } 486 } 487 488 static boolean_t 489 qc_reap(vmem_t *vm) 490 { 491 int i; 492 int qcache_idx_max; 493 boolean_t didsomething = FALSE; 494 495 qcache_idx_max = vm->vm_qcache_max >> vm->vm_quantum_shift; 496 for (i = 1; i <= qcache_idx_max; i++) { 497 qcache_t *qc = &vm->vm_qcache[i - 1]; 498 499 if (pool_reclaim(&qc->qc_pool) != 0) { 500 didsomething = TRUE; 501 } 502 } 503 504 return didsomething; 505 } 506 #endif /* defined(QCACHE) */ 507 508 #if defined(_KERNEL) 509 static int 510 vmem_init(void) 511 { 512 513 pool_cache_init(&bt_poolcache, &bt_pool, NULL, NULL, NULL); 514 return 0; 515 } 516 #endif /* defined(_KERNEL) */ 517 518 static vmem_addr_t 519 vmem_add1(vmem_t *vm, vmem_addr_t addr, vmem_size_t size, vm_flag_t flags, 520 int spanbttype) 521 { 522 bt_t *btspan; 523 bt_t *btfree; 524 525 KASSERT((flags & (VM_SLEEP|VM_NOSLEEP)) != 0); 526 KASSERT((~flags & (VM_SLEEP|VM_NOSLEEP)) != 0); 527 VMEM_ASSERT_UNLOCKED(vm); 528 529 btspan = bt_alloc(vm, flags); 530 if (btspan == NULL) { 531 return VMEM_ADDR_NULL; 532 } 533 btfree = bt_alloc(vm, flags); 534 if (btfree == NULL) { 535 bt_free(vm, btspan); 536 return VMEM_ADDR_NULL; 537 } 538 539 btspan->bt_type = spanbttype; 540 btspan->bt_start = addr; 541 btspan->bt_size = size; 542 543 btfree->bt_type = BT_TYPE_FREE; 544 btfree->bt_start = addr; 545 btfree->bt_size = size; 546 547 VMEM_LOCK(vm); 548 bt_insseg_tail(vm, btspan); 549 bt_insseg(vm, btfree, btspan); 550 bt_insfree(vm, btfree); 551 VMEM_UNLOCK(vm); 552 553 return addr; 554 } 555 556 static int 557 vmem_import(vmem_t *vm, vmem_size_t size, vm_flag_t flags) 558 { 559 vmem_addr_t addr; 560 561 VMEM_ASSERT_UNLOCKED(vm); 562 563 if (vm->vm_allocfn == NULL) { 564 return EINVAL; 565 } 566 567 addr = (*vm->vm_allocfn)(vm->vm_source, size, &size, flags); 568 if (addr == VMEM_ADDR_NULL) { 569 return ENOMEM; 570 } 571 572 if (vmem_add1(vm, addr, size, flags, BT_TYPE_SPAN) == VMEM_ADDR_NULL) { 573 (*vm->vm_freefn)(vm->vm_source, addr, size); 574 return ENOMEM; 575 } 576 577 return 0; 578 } 579 580 static int 581 vmem_rehash(vmem_t *vm, size_t newhashsize, vm_flag_t flags) 582 { 583 bt_t *bt; 584 int i; 585 struct vmem_hashlist *newhashlist; 586 struct vmem_hashlist *oldhashlist; 587 size_t oldhashsize; 588 589 KASSERT(newhashsize > 0); 590 VMEM_ASSERT_UNLOCKED(vm); 591 592 newhashlist = 593 xmalloc(sizeof(struct vmem_hashlist *) * newhashsize, flags); 594 if (newhashlist == NULL) { 595 return ENOMEM; 596 } 597 for (i = 0; i < newhashsize; i++) { 598 LIST_INIT(&newhashlist[i]); 599 } 600 601 VMEM_LOCK(vm); 602 oldhashlist = vm->vm_hashlist; 603 oldhashsize = vm->vm_hashsize; 604 vm->vm_hashlist = newhashlist; 605 vm->vm_hashsize = newhashsize; 606 if (oldhashlist == NULL) { 607 VMEM_UNLOCK(vm); 608 return 0; 609 } 610 for (i = 0; i < oldhashsize; i++) { 611 while ((bt = LIST_FIRST(&oldhashlist[i])) != NULL) { 612 bt_rembusy(vm, bt); /* XXX */ 613 bt_insbusy(vm, bt); 614 } 615 } 616 VMEM_UNLOCK(vm); 617 618 xfree(oldhashlist); 619 620 return 0; 621 } 622 623 /* 624 * vmem_fit: check if a bt can satisfy the given restrictions. 625 */ 626 627 static vmem_addr_t 628 vmem_fit(const bt_t *bt, vmem_size_t size, vmem_size_t align, vmem_size_t phase, 629 vmem_size_t nocross, vmem_addr_t minaddr, vmem_addr_t maxaddr) 630 { 631 vmem_addr_t start; 632 vmem_addr_t end; 633 634 KASSERT(bt->bt_size >= size); 635 636 /* 637 * XXX assumption: vmem_addr_t and vmem_size_t are 638 * unsigned integer of the same size. 639 */ 640 641 start = bt->bt_start; 642 if (start < minaddr) { 643 start = minaddr; 644 } 645 end = BT_END(bt); 646 if (end > maxaddr - 1) { 647 end = maxaddr - 1; 648 } 649 if (start >= end) { 650 return VMEM_ADDR_NULL; 651 } 652 653 start = VMEM_ALIGNUP(start - phase, align) + phase; 654 if (start < bt->bt_start) { 655 start += align; 656 } 657 if (VMEM_CROSS_P(start, start + size - 1, nocross)) { 658 KASSERT(align < nocross); 659 start = VMEM_ALIGNUP(start - phase, nocross) + phase; 660 } 661 if (start < end && end - start >= size) { 662 KASSERT((start & (align - 1)) == phase); 663 KASSERT(!VMEM_CROSS_P(start, start + size - 1, nocross)); 664 KASSERT(minaddr <= start); 665 KASSERT(maxaddr == 0 || start + size <= maxaddr); 666 KASSERT(bt->bt_start <= start); 667 KASSERT(start + size <= BT_END(bt)); 668 return start; 669 } 670 return VMEM_ADDR_NULL; 671 } 672 673 /* ---- vmem API */ 674 675 /* 676 * vmem_create: create an arena. 677 * 678 * => must not be called from interrupt context. 679 */ 680 681 vmem_t * 682 vmem_create(const char *name, vmem_addr_t base, vmem_size_t size, 683 vmem_size_t quantum, 684 vmem_addr_t (*allocfn)(vmem_t *, vmem_size_t, vmem_size_t *, vm_flag_t), 685 void (*freefn)(vmem_t *, vmem_addr_t, vmem_size_t), 686 vmem_t *source, vmem_size_t qcache_max, vm_flag_t flags) 687 { 688 vmem_t *vm; 689 int i; 690 #if defined(_KERNEL) 691 static ONCE_DECL(control); 692 #endif /* defined(_KERNEL) */ 693 694 KASSERT((flags & (VM_SLEEP|VM_NOSLEEP)) != 0); 695 KASSERT((~flags & (VM_SLEEP|VM_NOSLEEP)) != 0); 696 697 #if defined(_KERNEL) 698 if (RUN_ONCE(&control, vmem_init)) { 699 return NULL; 700 } 701 #endif /* defined(_KERNEL) */ 702 vm = xmalloc(sizeof(*vm), flags); 703 if (vm == NULL) { 704 return NULL; 705 } 706 707 VMEM_LOCK_INIT(vm); 708 vm->vm_name = name; 709 vm->vm_quantum_mask = quantum - 1; 710 vm->vm_quantum_shift = calc_order(quantum); 711 KASSERT(ORDER2SIZE(vm->vm_quantum_shift) == quantum); 712 vm->vm_allocfn = allocfn; 713 vm->vm_freefn = freefn; 714 vm->vm_source = source; 715 vm->vm_nbusytag = 0; 716 #if defined(QCACHE) 717 qc_init(vm, qcache_max); 718 #endif /* defined(QCACHE) */ 719 720 CIRCLEQ_INIT(&vm->vm_seglist); 721 for (i = 0; i < VMEM_MAXORDER; i++) { 722 LIST_INIT(&vm->vm_freelist[i]); 723 } 724 vm->vm_hashlist = NULL; 725 if (vmem_rehash(vm, VMEM_HASHSIZE_INIT, flags)) { 726 vmem_destroy(vm); 727 return NULL; 728 } 729 730 if (size != 0) { 731 if (vmem_add(vm, base, size, flags) == 0) { 732 vmem_destroy(vm); 733 return NULL; 734 } 735 } 736 737 return vm; 738 } 739 740 void 741 vmem_destroy(vmem_t *vm) 742 { 743 744 VMEM_ASSERT_UNLOCKED(vm); 745 746 if (vm->vm_hashlist != NULL) { 747 int i; 748 749 for (i = 0; i < vm->vm_hashsize; i++) { 750 bt_t *bt; 751 752 while ((bt = LIST_FIRST(&vm->vm_hashlist[i])) != NULL) { 753 KASSERT(bt->bt_type == BT_TYPE_SPAN_STATIC); 754 bt_free(vm, bt); 755 } 756 } 757 xfree(vm->vm_hashlist); 758 } 759 xfree(vm); 760 } 761 762 vmem_size_t 763 vmem_roundup_size(vmem_t *vm, vmem_size_t size) 764 { 765 766 return (size + vm->vm_quantum_mask) & ~vm->vm_quantum_mask; 767 } 768 769 /* 770 * vmem_alloc: 771 * 772 * => caller must ensure appropriate spl, 773 * if the arena can be accessed from interrupt context. 774 */ 775 776 vmem_addr_t 777 vmem_alloc(vmem_t *vm, vmem_size_t size0, vm_flag_t flags) 778 { 779 const vmem_size_t size __unused = vmem_roundup_size(vm, size0); 780 const vm_flag_t strat __unused = flags & VM_FITMASK; 781 782 KASSERT((flags & (VM_SLEEP|VM_NOSLEEP)) != 0); 783 KASSERT((~flags & (VM_SLEEP|VM_NOSLEEP)) != 0); 784 VMEM_ASSERT_UNLOCKED(vm); 785 786 KASSERT(size0 > 0); 787 KASSERT(size > 0); 788 KASSERT(strat == VM_BESTFIT || strat == VM_INSTANTFIT); 789 if ((flags & VM_SLEEP) != 0) { 790 ASSERT_SLEEPABLE(NULL, __func__); 791 } 792 793 #if defined(QCACHE) 794 if (size <= vm->vm_qcache_max) { 795 int qidx = size >> vm->vm_quantum_shift; 796 qcache_t *qc = &vm->vm_qcache[qidx - 1]; 797 798 return (vmem_addr_t)pool_cache_get(&qc->qc_cache, 799 vmf_to_prf(flags)); 800 } 801 #endif /* defined(QCACHE) */ 802 803 return vmem_xalloc(vm, size0, 0, 0, 0, 0, 0, flags); 804 } 805 806 vmem_addr_t 807 vmem_xalloc(vmem_t *vm, vmem_size_t size0, vmem_size_t align, vmem_size_t phase, 808 vmem_size_t nocross, vmem_addr_t minaddr, vmem_addr_t maxaddr, 809 vm_flag_t flags) 810 { 811 struct vmem_freelist *list; 812 struct vmem_freelist *first; 813 struct vmem_freelist *end; 814 bt_t *bt; 815 bt_t *btnew; 816 bt_t *btnew2; 817 const vmem_size_t size = vmem_roundup_size(vm, size0); 818 vm_flag_t strat = flags & VM_FITMASK; 819 vmem_addr_t start; 820 821 KASSERT(size0 > 0); 822 KASSERT(size > 0); 823 KASSERT(strat == VM_BESTFIT || strat == VM_INSTANTFIT); 824 if ((flags & VM_SLEEP) != 0) { 825 ASSERT_SLEEPABLE(NULL, __func__); 826 } 827 KASSERT((align & vm->vm_quantum_mask) == 0); 828 KASSERT((align & (align - 1)) == 0); 829 KASSERT((phase & vm->vm_quantum_mask) == 0); 830 KASSERT((nocross & vm->vm_quantum_mask) == 0); 831 KASSERT((nocross & (nocross - 1)) == 0); 832 KASSERT((align == 0 && phase == 0) || phase < align); 833 KASSERT(nocross == 0 || nocross >= size); 834 KASSERT(maxaddr == 0 || minaddr < maxaddr); 835 KASSERT(!VMEM_CROSS_P(phase, phase + size - 1, nocross)); 836 837 if (align == 0) { 838 align = vm->vm_quantum_mask + 1; 839 } 840 btnew = bt_alloc(vm, flags); 841 if (btnew == NULL) { 842 return VMEM_ADDR_NULL; 843 } 844 btnew2 = bt_alloc(vm, flags); /* XXX not necessary if no restrictions */ 845 if (btnew2 == NULL) { 846 bt_free(vm, btnew); 847 return VMEM_ADDR_NULL; 848 } 849 850 retry_strat: 851 first = bt_freehead_toalloc(vm, size, strat); 852 end = &vm->vm_freelist[VMEM_MAXORDER]; 853 retry: 854 bt = NULL; 855 VMEM_LOCK(vm); 856 if (strat == VM_INSTANTFIT) { 857 for (list = first; list < end; list++) { 858 bt = LIST_FIRST(list); 859 if (bt != NULL) { 860 start = vmem_fit(bt, size, align, phase, 861 nocross, minaddr, maxaddr); 862 if (start != VMEM_ADDR_NULL) { 863 goto gotit; 864 } 865 } 866 } 867 } else { /* VM_BESTFIT */ 868 for (list = first; list < end; list++) { 869 LIST_FOREACH(bt, list, bt_freelist) { 870 if (bt->bt_size >= size) { 871 start = vmem_fit(bt, size, align, phase, 872 nocross, minaddr, maxaddr); 873 if (start != VMEM_ADDR_NULL) { 874 goto gotit; 875 } 876 } 877 } 878 } 879 } 880 VMEM_UNLOCK(vm); 881 #if 1 882 if (strat == VM_INSTANTFIT) { 883 strat = VM_BESTFIT; 884 goto retry_strat; 885 } 886 #endif 887 if (align != vm->vm_quantum_mask + 1 || phase != 0 || 888 nocross != 0 || minaddr != 0 || maxaddr != 0) { 889 890 /* 891 * XXX should try to import a region large enough to 892 * satisfy restrictions? 893 */ 894 895 goto fail; 896 } 897 if (vmem_import(vm, size, flags) == 0) { 898 goto retry; 899 } 900 /* XXX */ 901 fail: 902 bt_free(vm, btnew); 903 bt_free(vm, btnew2); 904 return VMEM_ADDR_NULL; 905 906 gotit: 907 KASSERT(bt->bt_type == BT_TYPE_FREE); 908 KASSERT(bt->bt_size >= size); 909 bt_remfree(vm, bt); 910 if (bt->bt_start != start) { 911 btnew2->bt_type = BT_TYPE_FREE; 912 btnew2->bt_start = bt->bt_start; 913 btnew2->bt_size = start - bt->bt_start; 914 bt->bt_start = start; 915 bt->bt_size -= btnew2->bt_size; 916 bt_insfree(vm, btnew2); 917 bt_insseg(vm, btnew2, CIRCLEQ_PREV(bt, bt_seglist)); 918 btnew2 = NULL; 919 } 920 KASSERT(bt->bt_start == start); 921 if (bt->bt_size != size && bt->bt_size - size > vm->vm_quantum_mask) { 922 /* split */ 923 btnew->bt_type = BT_TYPE_BUSY; 924 btnew->bt_start = bt->bt_start; 925 btnew->bt_size = size; 926 bt->bt_start = bt->bt_start + size; 927 bt->bt_size -= size; 928 bt_insfree(vm, bt); 929 bt_insseg(vm, btnew, CIRCLEQ_PREV(bt, bt_seglist)); 930 bt_insbusy(vm, btnew); 931 VMEM_UNLOCK(vm); 932 } else { 933 bt->bt_type = BT_TYPE_BUSY; 934 bt_insbusy(vm, bt); 935 VMEM_UNLOCK(vm); 936 bt_free(vm, btnew); 937 btnew = bt; 938 } 939 if (btnew2 != NULL) { 940 bt_free(vm, btnew2); 941 } 942 KASSERT(btnew->bt_size >= size); 943 btnew->bt_type = BT_TYPE_BUSY; 944 945 return btnew->bt_start; 946 } 947 948 /* 949 * vmem_free: 950 * 951 * => caller must ensure appropriate spl, 952 * if the arena can be accessed from interrupt context. 953 */ 954 955 void 956 vmem_free(vmem_t *vm, vmem_addr_t addr, vmem_size_t size) 957 { 958 959 VMEM_ASSERT_UNLOCKED(vm); 960 KASSERT(addr != VMEM_ADDR_NULL); 961 KASSERT(size > 0); 962 963 #if defined(QCACHE) 964 if (size <= vm->vm_qcache_max) { 965 int qidx = (size + vm->vm_quantum_mask) >> vm->vm_quantum_shift; 966 qcache_t *qc = &vm->vm_qcache[qidx - 1]; 967 968 return pool_cache_put(&qc->qc_cache, (void *)addr); 969 } 970 #endif /* defined(QCACHE) */ 971 972 vmem_xfree(vm, addr, size); 973 } 974 975 void 976 vmem_xfree(vmem_t *vm, vmem_addr_t addr, vmem_size_t size) 977 { 978 bt_t *bt; 979 bt_t *t; 980 981 VMEM_ASSERT_UNLOCKED(vm); 982 KASSERT(addr != VMEM_ADDR_NULL); 983 KASSERT(size > 0); 984 985 VMEM_LOCK(vm); 986 987 bt = bt_lookupbusy(vm, addr); 988 KASSERT(bt != NULL); 989 KASSERT(bt->bt_start == addr); 990 KASSERT(bt->bt_size == vmem_roundup_size(vm, size) || 991 bt->bt_size - vmem_roundup_size(vm, size) <= vm->vm_quantum_mask); 992 KASSERT(bt->bt_type == BT_TYPE_BUSY); 993 bt_rembusy(vm, bt); 994 bt->bt_type = BT_TYPE_FREE; 995 996 /* coalesce */ 997 t = CIRCLEQ_NEXT(bt, bt_seglist); 998 if (t != NULL && t->bt_type == BT_TYPE_FREE) { 999 KASSERT(BT_END(bt) == t->bt_start); 1000 bt_remfree(vm, t); 1001 bt_remseg(vm, t); 1002 bt->bt_size += t->bt_size; 1003 bt_free(vm, t); 1004 } 1005 t = CIRCLEQ_PREV(bt, bt_seglist); 1006 if (t != NULL && t->bt_type == BT_TYPE_FREE) { 1007 KASSERT(BT_END(t) == bt->bt_start); 1008 bt_remfree(vm, t); 1009 bt_remseg(vm, t); 1010 bt->bt_size += t->bt_size; 1011 bt->bt_start = t->bt_start; 1012 bt_free(vm, t); 1013 } 1014 1015 t = CIRCLEQ_PREV(bt, bt_seglist); 1016 KASSERT(t != NULL); 1017 KASSERT(BT_ISSPAN_P(t) || t->bt_type == BT_TYPE_BUSY); 1018 if (vm->vm_freefn != NULL && t->bt_type == BT_TYPE_SPAN && 1019 t->bt_size == bt->bt_size) { 1020 vmem_addr_t spanaddr; 1021 vmem_size_t spansize; 1022 1023 KASSERT(t->bt_start == bt->bt_start); 1024 spanaddr = bt->bt_start; 1025 spansize = bt->bt_size; 1026 bt_remseg(vm, bt); 1027 bt_free(vm, bt); 1028 bt_remseg(vm, t); 1029 bt_free(vm, t); 1030 VMEM_UNLOCK(vm); 1031 (*vm->vm_freefn)(vm->vm_source, spanaddr, spansize); 1032 } else { 1033 bt_insfree(vm, bt); 1034 VMEM_UNLOCK(vm); 1035 } 1036 } 1037 1038 /* 1039 * vmem_add: 1040 * 1041 * => caller must ensure appropriate spl, 1042 * if the arena can be accessed from interrupt context. 1043 */ 1044 1045 vmem_addr_t 1046 vmem_add(vmem_t *vm, vmem_addr_t addr, vmem_size_t size, vm_flag_t flags) 1047 { 1048 1049 return vmem_add1(vm, addr, size, flags, BT_TYPE_SPAN_STATIC); 1050 } 1051 1052 /* 1053 * vmem_reap: reap unused resources. 1054 * 1055 * => return TRUE if we successfully reaped something. 1056 */ 1057 1058 boolean_t 1059 vmem_reap(vmem_t *vm) 1060 { 1061 boolean_t didsomething = FALSE; 1062 1063 VMEM_ASSERT_UNLOCKED(vm); 1064 1065 #if defined(QCACHE) 1066 didsomething = qc_reap(vm); 1067 #endif /* defined(QCACHE) */ 1068 return didsomething; 1069 } 1070 1071 /* ---- debug */ 1072 1073 #if defined(VMEM_DEBUG) 1074 1075 #if !defined(_KERNEL) 1076 #include <stdio.h> 1077 #endif /* !defined(_KERNEL) */ 1078 1079 void bt_dump(const bt_t *); 1080 1081 void 1082 bt_dump(const bt_t *bt) 1083 { 1084 1085 printf("\t%p: %" PRIu64 ", %" PRIu64 ", %d\n", 1086 bt, (uint64_t)bt->bt_start, (uint64_t)bt->bt_size, 1087 bt->bt_type); 1088 } 1089 1090 void 1091 vmem_dump(const vmem_t *vm) 1092 { 1093 const bt_t *bt; 1094 int i; 1095 1096 printf("vmem %p '%s'\n", vm, vm->vm_name); 1097 CIRCLEQ_FOREACH(bt, &vm->vm_seglist, bt_seglist) { 1098 bt_dump(bt); 1099 } 1100 1101 for (i = 0; i < VMEM_MAXORDER; i++) { 1102 const struct vmem_freelist *fl = &vm->vm_freelist[i]; 1103 1104 if (LIST_EMPTY(fl)) { 1105 continue; 1106 } 1107 1108 printf("freelist[%d]\n", i); 1109 LIST_FOREACH(bt, fl, bt_freelist) { 1110 bt_dump(bt); 1111 if (bt->bt_size) { 1112 } 1113 } 1114 } 1115 } 1116 1117 #if !defined(_KERNEL) 1118 1119 #include <stdlib.h> 1120 1121 int 1122 main() 1123 { 1124 vmem_t *vm; 1125 vmem_addr_t p; 1126 struct reg { 1127 vmem_addr_t p; 1128 vmem_size_t sz; 1129 boolean_t x; 1130 } *reg = NULL; 1131 int nreg = 0; 1132 int nalloc = 0; 1133 int nfree = 0; 1134 vmem_size_t total = 0; 1135 #if 1 1136 vm_flag_t strat = VM_INSTANTFIT; 1137 #else 1138 vm_flag_t strat = VM_BESTFIT; 1139 #endif 1140 1141 vm = vmem_create("test", VMEM_ADDR_NULL, 0, 1, 1142 NULL, NULL, NULL, 0, VM_NOSLEEP); 1143 if (vm == NULL) { 1144 printf("vmem_create\n"); 1145 exit(EXIT_FAILURE); 1146 } 1147 vmem_dump(vm); 1148 1149 p = vmem_add(vm, 100, 200, VM_SLEEP); 1150 p = vmem_add(vm, 2000, 1, VM_SLEEP); 1151 p = vmem_add(vm, 40000, 0x10000000>>12, VM_SLEEP); 1152 p = vmem_add(vm, 10000, 10000, VM_SLEEP); 1153 p = vmem_add(vm, 500, 1000, VM_SLEEP); 1154 vmem_dump(vm); 1155 for (;;) { 1156 struct reg *r; 1157 int t = rand() % 100; 1158 1159 if (t > 45) { 1160 /* alloc */ 1161 vmem_size_t sz = rand() % 500 + 1; 1162 boolean_t x; 1163 vmem_size_t align, phase, nocross; 1164 vmem_addr_t minaddr, maxaddr; 1165 1166 if (t > 70) { 1167 x = TRUE; 1168 /* XXX */ 1169 align = 1 << (rand() % 15); 1170 phase = rand() % 65536; 1171 nocross = 1 << (rand() % 15); 1172 if (align <= phase) { 1173 phase = 0; 1174 } 1175 if (VMEM_CROSS_P(phase, phase + sz - 1, 1176 nocross)) { 1177 nocross = 0; 1178 } 1179 minaddr = rand() % 50000; 1180 maxaddr = rand() % 70000; 1181 if (minaddr > maxaddr) { 1182 minaddr = 0; 1183 maxaddr = 0; 1184 } 1185 printf("=== xalloc %" PRIu64 1186 " align=%" PRIu64 ", phase=%" PRIu64 1187 ", nocross=%" PRIu64 ", min=%" PRIu64 1188 ", max=%" PRIu64 "\n", 1189 (uint64_t)sz, 1190 (uint64_t)align, 1191 (uint64_t)phase, 1192 (uint64_t)nocross, 1193 (uint64_t)minaddr, 1194 (uint64_t)maxaddr); 1195 p = vmem_xalloc(vm, sz, align, phase, nocross, 1196 minaddr, maxaddr, strat|VM_SLEEP); 1197 } else { 1198 x = FALSE; 1199 printf("=== alloc %" PRIu64 "\n", (uint64_t)sz); 1200 p = vmem_alloc(vm, sz, strat|VM_SLEEP); 1201 } 1202 printf("-> %" PRIu64 "\n", (uint64_t)p); 1203 vmem_dump(vm); 1204 if (p == VMEM_ADDR_NULL) { 1205 if (x) { 1206 continue; 1207 } 1208 break; 1209 } 1210 nreg++; 1211 reg = realloc(reg, sizeof(*reg) * nreg); 1212 r = ®[nreg - 1]; 1213 r->p = p; 1214 r->sz = sz; 1215 r->x = x; 1216 total += sz; 1217 nalloc++; 1218 } else if (nreg != 0) { 1219 /* free */ 1220 r = ®[rand() % nreg]; 1221 printf("=== free %" PRIu64 ", %" PRIu64 "\n", 1222 (uint64_t)r->p, (uint64_t)r->sz); 1223 if (r->x) { 1224 vmem_xfree(vm, r->p, r->sz); 1225 } else { 1226 vmem_free(vm, r->p, r->sz); 1227 } 1228 total -= r->sz; 1229 vmem_dump(vm); 1230 *r = reg[nreg - 1]; 1231 nreg--; 1232 nfree++; 1233 } 1234 printf("total=%" PRIu64 "\n", (uint64_t)total); 1235 } 1236 fprintf(stderr, "total=%" PRIu64 ", nalloc=%d, nfree=%d\n", 1237 (uint64_t)total, nalloc, nfree); 1238 exit(EXIT_SUCCESS); 1239 } 1240 #endif /* !defined(_KERNEL) */ 1241 #endif /* defined(VMEM_DEBUG) */ 1242