1 /* $NetBSD: subr_vmem.c,v 1.118 2024/12/06 19:17:59 riastradh Exp $ */ 2 3 /*- 4 * Copyright (c)2006,2007,2008,2009 YAMAMOTO Takashi, 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 */ 28 29 /* 30 * reference: 31 * - Magazines and Vmem: Extending the Slab Allocator 32 * to Many CPUs and Arbitrary Resources 33 * http://www.usenix.org/event/usenix01/bonwick.html 34 * 35 * locking & the boundary tag pool: 36 * - A pool(9) is used for vmem boundary tags 37 * - During a pool get call the global vmem_btag_refill_lock is taken, 38 * to serialize access to the allocation reserve, but no other 39 * vmem arena locks. 40 * - During pool_put calls no vmem mutexes are locked. 41 * - pool_drain doesn't hold the pool's mutex while releasing memory to 42 * its backing therefore no interference with any vmem mutexes. 43 * - The boundary tag pool is forced to put page headers into pool pages 44 * (PR_PHINPAGE) and not off page to avoid pool recursion. 45 * (due to sizeof(bt_t) it should be the case anyway) 46 */ 47 48 #include <sys/cdefs.h> 49 __KERNEL_RCSID(0, "$NetBSD: subr_vmem.c,v 1.118 2024/12/06 19:17:59 riastradh Exp $"); 50 51 #if defined(_KERNEL) && defined(_KERNEL_OPT) 52 #include "opt_ddb.h" 53 #endif /* defined(_KERNEL) && defined(_KERNEL_OPT) */ 54 55 #include <sys/param.h> 56 #include <sys/types.h> 57 58 #include <sys/bitops.h> 59 #include <sys/hash.h> 60 #include <sys/queue.h> 61 62 #if defined(_KERNEL) 63 64 #include <sys/atomic.h> 65 #include <sys/callout.h> 66 #include <sys/kernel.h> /* hz */ 67 #include <sys/kmem.h> 68 #include <sys/pool.h> 69 #include <sys/sdt.h> 70 #include <sys/systm.h> 71 #include <sys/vmem.h> 72 #include <sys/vmem_impl.h> 73 #include <sys/workqueue.h> 74 75 #include <uvm/uvm.h> 76 #include <uvm/uvm_extern.h> 77 #include <uvm/uvm_km.h> 78 #include <uvm/uvm_page.h> 79 #include <uvm/uvm_pdaemon.h> 80 81 #else /* defined(_KERNEL) */ 82 83 #include <assert.h> 84 #include <errno.h> 85 #include <stdio.h> 86 #include <stdlib.h> 87 #include <string.h> 88 89 #include "../sys/vmem.h" 90 #include "../sys/vmem_impl.h" 91 92 #define SET_ERROR(E) (E) 93 94 #endif /* defined(_KERNEL) */ 95 96 #if defined(_KERNEL) 97 98 #include <sys/evcnt.h> 99 100 #define VMEM_EVCNT_DEFINE(name) \ 101 struct evcnt vmem_evcnt_##name = EVCNT_INITIALIZER(EVCNT_TYPE_MISC, NULL, \ 102 "vmem", #name); \ 103 EVCNT_ATTACH_STATIC(vmem_evcnt_##name) 104 #define VMEM_EVCNT_INCR(ev) (vmem_evcnt_##ev.ev_count++) 105 #define VMEM_EVCNT_DECR(ev) (vmem_evcnt_##ev.ev_count--) 106 107 VMEM_EVCNT_DEFINE(static_bt_count); 108 VMEM_EVCNT_DEFINE(static_bt_inuse); 109 110 #define VMEM_CONDVAR_INIT(vm, wchan) cv_init(&vm->vm_cv, wchan) 111 #define VMEM_CONDVAR_DESTROY(vm) cv_destroy(&vm->vm_cv) 112 #define VMEM_CONDVAR_WAIT(vm) cv_wait(&vm->vm_cv, &vm->vm_lock) 113 #define VMEM_CONDVAR_BROADCAST(vm) cv_broadcast(&vm->vm_cv) 114 115 #else /* defined(_KERNEL) */ 116 117 #define VMEM_EVCNT_INCR(ev) __nothing 118 #define VMEM_EVCNT_DECR(ev) __nothing 119 120 #define VMEM_CONDVAR_INIT(vm, wchan) __nothing 121 #define VMEM_CONDVAR_DESTROY(vm) __nothing 122 #define VMEM_CONDVAR_WAIT(vm) __nothing 123 #define VMEM_CONDVAR_BROADCAST(vm) __nothing 124 125 #define UNITTEST 126 #define KASSERT(a) assert(a) 127 #define KASSERTMSG(a, m, ...) assert(a) 128 #define mutex_init(a, b, c) __nothing 129 #define mutex_destroy(a) __nothing 130 #define mutex_enter(a) __nothing 131 #define mutex_tryenter(a) true 132 #define mutex_exit(a) __nothing 133 #define mutex_owned(a) true 134 #define ASSERT_SLEEPABLE() __nothing 135 #define panic(...) (printf(__VA_ARGS__), abort()) 136 137 #endif /* defined(_KERNEL) */ 138 139 #if defined(VMEM_SANITY) 140 static void vmem_check(vmem_t *); 141 #else /* defined(VMEM_SANITY) */ 142 #define vmem_check(vm) __nothing 143 #endif /* defined(VMEM_SANITY) */ 144 145 #define VMEM_HASHSIZE_MIN 1 /* XXX */ 146 #define VMEM_HASHSIZE_MAX 65536 /* XXX */ 147 #define VMEM_HASHSIZE_INIT 1 148 149 #define VM_FITMASK (VM_BESTFIT | VM_INSTANTFIT) 150 151 #if defined(_KERNEL) 152 static bool vmem_bootstrapped = false; 153 static kmutex_t vmem_list_lock; 154 static LIST_HEAD(, vmem) vmem_list = LIST_HEAD_INITIALIZER(vmem_list); 155 #endif /* defined(_KERNEL) */ 156 157 /* ---- misc */ 158 159 #define VMEM_LOCK(vm) mutex_enter(&(vm)->vm_lock) 160 #define VMEM_TRYLOCK(vm) mutex_tryenter(&(vm)->vm_lock) 161 #define VMEM_UNLOCK(vm) mutex_exit(&(vm)->vm_lock) 162 #define VMEM_LOCK_INIT(vm, ipl) mutex_init(&(vm)->vm_lock, MUTEX_DEFAULT, (ipl)) 163 #define VMEM_LOCK_DESTROY(vm) mutex_destroy(&(vm)->vm_lock) 164 #define VMEM_ASSERT_LOCKED(vm) KASSERT(mutex_owned(&(vm)->vm_lock)) 165 166 #define VMEM_ALIGNUP(addr, align) \ 167 (-(-(addr) & -(align))) 168 169 #define VMEM_CROSS_P(addr1, addr2, boundary) \ 170 ((((addr1) ^ (addr2)) & -(boundary)) != 0) 171 172 #define ORDER2SIZE(order) ((vmem_size_t)1 << (order)) 173 #define SIZE2ORDER(size) ((int)ilog2(size)) 174 175 static void 176 vmem_kick_pdaemon(void) 177 { 178 #if defined(_KERNEL) 179 uvm_kick_pdaemon(); 180 #endif 181 } 182 183 static void vmem_xfree_bt(vmem_t *, bt_t *); 184 185 #if !defined(_KERNEL) 186 #define xmalloc(sz, flags) malloc(sz) 187 #define xfree(p, sz) free(p) 188 #define bt_alloc(vm, flags) malloc(sizeof(bt_t)) 189 #define bt_free(vm, bt) free(bt) 190 #define bt_freetrim(vm, l) __nothing 191 #else /* defined(_KERNEL) */ 192 193 #define xmalloc(sz, flags) \ 194 kmem_alloc(sz, ((flags) & VM_SLEEP) ? KM_SLEEP : KM_NOSLEEP); 195 #define xfree(p, sz) kmem_free(p, sz); 196 197 /* 198 * BT_RESERVE calculation: 199 * we allocate memory for boundary tags with vmem; therefore we have 200 * to keep a reserve of bts used to allocated memory for bts. 201 * This reserve is 4 for each arena involved in allocating vmems memory. 202 * BT_MAXFREE: don't cache excessive counts of bts in arenas 203 */ 204 #define STATIC_BT_COUNT 200 205 #define BT_MINRESERVE 4 206 #define BT_MAXFREE 64 207 208 static struct vmem_btag static_bts[STATIC_BT_COUNT]; 209 static int static_bt_count = STATIC_BT_COUNT; 210 211 static struct vmem kmem_va_meta_arena_store; 212 vmem_t *kmem_va_meta_arena; 213 static struct vmem kmem_meta_arena_store; 214 vmem_t *kmem_meta_arena = NULL; 215 216 static kmutex_t vmem_btag_refill_lock; 217 static kmutex_t vmem_btag_lock; 218 static LIST_HEAD(, vmem_btag) vmem_btag_freelist; 219 static size_t vmem_btag_freelist_count = 0; 220 static struct pool vmem_btag_pool; 221 static bool vmem_btag_pool_initialized __read_mostly; 222 223 /* ---- boundary tag */ 224 225 static int bt_refill(vmem_t *vm); 226 static int bt_refill_locked(vmem_t *vm); 227 228 static void * 229 pool_page_alloc_vmem_meta(struct pool *pp, int flags) 230 { 231 const vm_flag_t vflags = (flags & PR_WAITOK) ? VM_SLEEP: VM_NOSLEEP; 232 vmem_addr_t va; 233 int ret; 234 235 ret = vmem_alloc(kmem_meta_arena, pp->pr_alloc->pa_pagesz, 236 (vflags & ~VM_FITMASK) | VM_INSTANTFIT | VM_POPULATING, &va); 237 238 return ret ? NULL : (void *)va; 239 } 240 241 static void 242 pool_page_free_vmem_meta(struct pool *pp, void *v) 243 { 244 245 vmem_free(kmem_meta_arena, (vmem_addr_t)v, pp->pr_alloc->pa_pagesz); 246 } 247 248 /* allocator for vmem-pool metadata */ 249 struct pool_allocator pool_allocator_vmem_meta = { 250 .pa_alloc = pool_page_alloc_vmem_meta, 251 .pa_free = pool_page_free_vmem_meta, 252 .pa_pagesz = 0 253 }; 254 255 static int 256 bt_refill_locked(vmem_t *vm) 257 { 258 bt_t *bt; 259 260 VMEM_ASSERT_LOCKED(vm); 261 262 if (vm->vm_nfreetags > BT_MINRESERVE) { 263 return 0; 264 } 265 266 mutex_enter(&vmem_btag_lock); 267 while (!LIST_EMPTY(&vmem_btag_freelist) && 268 vm->vm_nfreetags <= BT_MINRESERVE && 269 (vm->vm_flags & VM_PRIVTAGS) == 0) { 270 bt = LIST_FIRST(&vmem_btag_freelist); 271 LIST_REMOVE(bt, bt_freelist); 272 bt->bt_flags = 0; 273 LIST_INSERT_HEAD(&vm->vm_freetags, bt, bt_freelist); 274 vm->vm_nfreetags++; 275 vmem_btag_freelist_count--; 276 VMEM_EVCNT_INCR(static_bt_inuse); 277 } 278 mutex_exit(&vmem_btag_lock); 279 280 while (vm->vm_nfreetags <= BT_MINRESERVE) { 281 VMEM_UNLOCK(vm); 282 KASSERT(vmem_btag_pool_initialized); 283 mutex_enter(&vmem_btag_refill_lock); 284 bt = pool_get(&vmem_btag_pool, PR_NOWAIT); 285 mutex_exit(&vmem_btag_refill_lock); 286 VMEM_LOCK(vm); 287 if (bt == NULL) 288 break; 289 bt->bt_flags = 0; 290 LIST_INSERT_HEAD(&vm->vm_freetags, bt, bt_freelist); 291 vm->vm_nfreetags++; 292 } 293 294 if (vm->vm_nfreetags <= BT_MINRESERVE) { 295 return SET_ERROR(ENOMEM); 296 } 297 298 if (kmem_meta_arena != NULL) { 299 VMEM_UNLOCK(vm); 300 (void)bt_refill(kmem_arena); 301 (void)bt_refill(kmem_va_meta_arena); 302 (void)bt_refill(kmem_meta_arena); 303 VMEM_LOCK(vm); 304 } 305 306 return 0; 307 } 308 309 static int 310 bt_refill(vmem_t *vm) 311 { 312 int rv; 313 314 VMEM_LOCK(vm); 315 rv = bt_refill_locked(vm); 316 VMEM_UNLOCK(vm); 317 return rv; 318 } 319 320 static bt_t * 321 bt_alloc(vmem_t *vm, vm_flag_t flags) 322 { 323 bt_t *bt; 324 325 VMEM_ASSERT_LOCKED(vm); 326 327 while (vm->vm_nfreetags <= BT_MINRESERVE && (flags & VM_POPULATING) == 0) { 328 if (bt_refill_locked(vm)) { 329 if ((flags & VM_NOSLEEP) != 0) { 330 return NULL; 331 } 332 333 /* 334 * It would be nice to wait for something specific here 335 * but there are multiple ways that a retry could 336 * succeed and we can't wait for multiple things 337 * simultaneously. So we'll just sleep for an arbitrary 338 * short period of time and retry regardless. 339 * This should be a very rare case. 340 */ 341 342 vmem_kick_pdaemon(); 343 kpause("btalloc", false, 1, &vm->vm_lock); 344 } 345 } 346 bt = LIST_FIRST(&vm->vm_freetags); 347 LIST_REMOVE(bt, bt_freelist); 348 vm->vm_nfreetags--; 349 350 return bt; 351 } 352 353 static void 354 bt_free(vmem_t *vm, bt_t *bt) 355 { 356 357 VMEM_ASSERT_LOCKED(vm); 358 359 LIST_INSERT_HEAD(&vm->vm_freetags, bt, bt_freelist); 360 vm->vm_nfreetags++; 361 } 362 363 static void 364 bt_freetrim(vmem_t *vm, int freelimit) 365 { 366 bt_t *bt, *next_bt; 367 LIST_HEAD(, vmem_btag) tofree; 368 369 VMEM_ASSERT_LOCKED(vm); 370 371 LIST_INIT(&tofree); 372 373 LIST_FOREACH_SAFE(bt, &vm->vm_freetags, bt_freelist, next_bt) { 374 if (vm->vm_nfreetags <= freelimit) { 375 break; 376 } 377 if (bt->bt_flags & BT_F_PRIVATE) { 378 continue; 379 } 380 LIST_REMOVE(bt, bt_freelist); 381 vm->vm_nfreetags--; 382 if (bt >= static_bts 383 && bt < &static_bts[STATIC_BT_COUNT]) { 384 mutex_enter(&vmem_btag_lock); 385 LIST_INSERT_HEAD(&vmem_btag_freelist, bt, bt_freelist); 386 vmem_btag_freelist_count++; 387 mutex_exit(&vmem_btag_lock); 388 VMEM_EVCNT_DECR(static_bt_inuse); 389 } else { 390 LIST_INSERT_HEAD(&tofree, bt, bt_freelist); 391 } 392 } 393 394 VMEM_UNLOCK(vm); 395 while (!LIST_EMPTY(&tofree)) { 396 bt = LIST_FIRST(&tofree); 397 LIST_REMOVE(bt, bt_freelist); 398 pool_put(&vmem_btag_pool, bt); 399 } 400 } 401 402 /* 403 * Add private boundary tags (statically-allocated by the caller) 404 * to a vmem arena's free tag list. 405 */ 406 void 407 vmem_add_bts(vmem_t *vm, struct vmem_btag *bts, unsigned int nbts) 408 { 409 VMEM_LOCK(vm); 410 while (nbts != 0) { 411 bts->bt_flags = BT_F_PRIVATE; 412 LIST_INSERT_HEAD(&vm->vm_freetags, bts, bt_freelist); 413 vm->vm_nfreetags++; 414 bts++; 415 nbts--; 416 } 417 VMEM_UNLOCK(vm); 418 } 419 #endif /* defined(_KERNEL) */ 420 421 /* 422 * freelist[0] ... [1, 1] 423 * freelist[1] ... [2, 3] 424 * freelist[2] ... [4, 7] 425 * freelist[3] ... [8, 15] 426 * : 427 * freelist[n] ... [(1 << n), (1 << (n + 1)) - 1] 428 * : 429 */ 430 431 static struct vmem_freelist * 432 bt_freehead_tofree(vmem_t *vm, vmem_size_t size) 433 { 434 const vmem_size_t qsize = size >> vm->vm_quantum_shift; 435 const int idx = SIZE2ORDER(qsize); 436 437 KASSERT(size != 0); 438 KASSERT(qsize != 0); 439 KASSERT((size & vm->vm_quantum_mask) == 0); 440 KASSERT(idx >= 0); 441 KASSERT(idx < VMEM_MAXORDER); 442 443 return &vm->vm_freelist[idx]; 444 } 445 446 /* 447 * bt_freehead_toalloc: return the freelist for the given size and allocation 448 * strategy. 449 * 450 * for VM_INSTANTFIT, return the list in which any blocks are large enough 451 * for the requested size. otherwise, return the list which can have blocks 452 * large enough for the requested size. 453 */ 454 455 static struct vmem_freelist * 456 bt_freehead_toalloc(vmem_t *vm, vmem_size_t size, vm_flag_t strat) 457 { 458 const vmem_size_t qsize = size >> vm->vm_quantum_shift; 459 int idx = SIZE2ORDER(qsize); 460 461 KASSERT(size != 0); 462 KASSERT(qsize != 0); 463 KASSERT((size & vm->vm_quantum_mask) == 0); 464 465 if (strat == VM_INSTANTFIT && ORDER2SIZE(idx) != qsize) { 466 idx++; 467 /* check too large request? */ 468 } 469 KASSERT(idx >= 0); 470 KASSERT(idx < VMEM_MAXORDER); 471 472 return &vm->vm_freelist[idx]; 473 } 474 475 /* ---- boundary tag hash */ 476 477 static struct vmem_hashlist * 478 bt_hashhead(vmem_t *vm, vmem_addr_t addr) 479 { 480 struct vmem_hashlist *list; 481 unsigned int hash; 482 483 hash = hash32_buf(&addr, sizeof(addr), HASH32_BUF_INIT); 484 list = &vm->vm_hashlist[hash & vm->vm_hashmask]; 485 486 return list; 487 } 488 489 static bt_t * 490 bt_lookupbusy(vmem_t *vm, vmem_addr_t addr) 491 { 492 struct vmem_hashlist *list; 493 bt_t *bt; 494 495 list = bt_hashhead(vm, addr); 496 LIST_FOREACH(bt, list, bt_hashlist) { 497 if (bt->bt_start == addr) { 498 break; 499 } 500 } 501 502 return bt; 503 } 504 505 static void 506 bt_rembusy(vmem_t *vm, bt_t *bt) 507 { 508 509 KASSERT(vm->vm_nbusytag > 0); 510 vm->vm_inuse -= bt->bt_size; 511 vm->vm_nbusytag--; 512 LIST_REMOVE(bt, bt_hashlist); 513 } 514 515 static void 516 bt_insbusy(vmem_t *vm, bt_t *bt) 517 { 518 struct vmem_hashlist *list; 519 520 KASSERT(bt->bt_type == BT_TYPE_BUSY); 521 522 list = bt_hashhead(vm, bt->bt_start); 523 LIST_INSERT_HEAD(list, bt, bt_hashlist); 524 if (++vm->vm_nbusytag > vm->vm_maxbusytag) { 525 vm->vm_maxbusytag = vm->vm_nbusytag; 526 } 527 vm->vm_inuse += bt->bt_size; 528 } 529 530 /* ---- boundary tag list */ 531 532 static void 533 bt_remseg(vmem_t *vm, bt_t *bt) 534 { 535 536 TAILQ_REMOVE(&vm->vm_seglist, bt, bt_seglist); 537 } 538 539 static void 540 bt_insseg(vmem_t *vm, bt_t *bt, bt_t *prev) 541 { 542 543 TAILQ_INSERT_AFTER(&vm->vm_seglist, prev, bt, bt_seglist); 544 } 545 546 static void 547 bt_insseg_tail(vmem_t *vm, bt_t *bt) 548 { 549 550 TAILQ_INSERT_TAIL(&vm->vm_seglist, bt, bt_seglist); 551 } 552 553 static void 554 bt_remfree(vmem_t *vm, bt_t *bt) 555 { 556 557 KASSERT(bt->bt_type == BT_TYPE_FREE); 558 559 LIST_REMOVE(bt, bt_freelist); 560 } 561 562 static void 563 bt_insfree(vmem_t *vm, bt_t *bt) 564 { 565 struct vmem_freelist *list; 566 567 list = bt_freehead_tofree(vm, bt->bt_size); 568 LIST_INSERT_HEAD(list, bt, bt_freelist); 569 } 570 571 /* ---- vmem internal functions */ 572 573 #if defined(QCACHE) 574 static inline vm_flag_t 575 prf_to_vmf(int prflags) 576 { 577 vm_flag_t vmflags; 578 579 KASSERT((prflags & ~(PR_LIMITFAIL | PR_WAITOK | PR_NOWAIT)) == 0); 580 if ((prflags & PR_WAITOK) != 0) { 581 vmflags = VM_SLEEP; 582 } else { 583 vmflags = VM_NOSLEEP; 584 } 585 return vmflags; 586 } 587 588 static inline int 589 vmf_to_prf(vm_flag_t vmflags) 590 { 591 int prflags; 592 593 if ((vmflags & VM_SLEEP) != 0) { 594 prflags = PR_WAITOK; 595 } else { 596 prflags = PR_NOWAIT; 597 } 598 return prflags; 599 } 600 601 static size_t 602 qc_poolpage_size(size_t qcache_max) 603 { 604 int i; 605 606 for (i = 0; ORDER2SIZE(i) <= qcache_max * 3; i++) { 607 /* nothing */ 608 } 609 return ORDER2SIZE(i); 610 } 611 612 static void * 613 qc_poolpage_alloc(struct pool *pool, int prflags) 614 { 615 qcache_t *qc = QC_POOL_TO_QCACHE(pool); 616 vmem_t *vm = qc->qc_vmem; 617 vmem_addr_t addr; 618 619 if (vmem_alloc(vm, pool->pr_alloc->pa_pagesz, 620 prf_to_vmf(prflags) | VM_INSTANTFIT, &addr) != 0) 621 return NULL; 622 return (void *)addr; 623 } 624 625 static void 626 qc_poolpage_free(struct pool *pool, void *addr) 627 { 628 qcache_t *qc = QC_POOL_TO_QCACHE(pool); 629 vmem_t *vm = qc->qc_vmem; 630 631 vmem_free(vm, (vmem_addr_t)addr, pool->pr_alloc->pa_pagesz); 632 } 633 634 static void 635 qc_init(vmem_t *vm, size_t qcache_max, int ipl) 636 { 637 qcache_t *prevqc; 638 struct pool_allocator *pa; 639 int qcache_idx_max; 640 int i; 641 642 KASSERT((qcache_max & vm->vm_quantum_mask) == 0); 643 if (qcache_max > (VMEM_QCACHE_IDX_MAX << vm->vm_quantum_shift)) { 644 qcache_max = VMEM_QCACHE_IDX_MAX << vm->vm_quantum_shift; 645 } 646 vm->vm_qcache_max = qcache_max; 647 pa = &vm->vm_qcache_allocator; 648 memset(pa, 0, sizeof(*pa)); 649 pa->pa_alloc = qc_poolpage_alloc; 650 pa->pa_free = qc_poolpage_free; 651 pa->pa_pagesz = qc_poolpage_size(qcache_max); 652 653 qcache_idx_max = qcache_max >> vm->vm_quantum_shift; 654 prevqc = NULL; 655 for (i = qcache_idx_max; i > 0; i--) { 656 qcache_t *qc = &vm->vm_qcache_store[i - 1]; 657 size_t size = i << vm->vm_quantum_shift; 658 pool_cache_t pc; 659 660 qc->qc_vmem = vm; 661 snprintf(qc->qc_name, sizeof(qc->qc_name), "%s-%zu", 662 vm->vm_name, size); 663 664 pc = pool_cache_init(size, 665 ORDER2SIZE(vm->vm_quantum_shift), 0, 666 PR_NOALIGN | PR_NOTOUCH | PR_RECURSIVE /* XXX */, 667 qc->qc_name, pa, ipl, NULL, NULL, NULL); 668 669 KASSERT(pc); 670 671 qc->qc_cache = pc; 672 KASSERT(qc->qc_cache != NULL); /* XXX */ 673 if (prevqc != NULL && 674 qc->qc_cache->pc_pool.pr_itemsperpage == 675 prevqc->qc_cache->pc_pool.pr_itemsperpage) { 676 pool_cache_destroy(qc->qc_cache); 677 vm->vm_qcache[i - 1] = prevqc; 678 continue; 679 } 680 qc->qc_cache->pc_pool.pr_qcache = qc; 681 vm->vm_qcache[i - 1] = qc; 682 prevqc = qc; 683 } 684 } 685 686 static void 687 qc_destroy(vmem_t *vm) 688 { 689 const qcache_t *prevqc; 690 int i; 691 int qcache_idx_max; 692 693 qcache_idx_max = vm->vm_qcache_max >> vm->vm_quantum_shift; 694 prevqc = NULL; 695 for (i = 0; i < qcache_idx_max; i++) { 696 qcache_t *qc = vm->vm_qcache[i]; 697 698 if (prevqc == qc) { 699 continue; 700 } 701 pool_cache_destroy(qc->qc_cache); 702 prevqc = qc; 703 } 704 } 705 #endif 706 707 #if defined(_KERNEL) 708 static void 709 vmem_bootstrap(void) 710 { 711 712 mutex_init(&vmem_list_lock, MUTEX_DEFAULT, IPL_NONE); 713 mutex_init(&vmem_btag_lock, MUTEX_DEFAULT, IPL_VM); 714 mutex_init(&vmem_btag_refill_lock, MUTEX_DEFAULT, IPL_VM); 715 716 while (static_bt_count-- > 0) { 717 bt_t *bt = &static_bts[static_bt_count]; 718 LIST_INSERT_HEAD(&vmem_btag_freelist, bt, bt_freelist); 719 VMEM_EVCNT_INCR(static_bt_count); 720 vmem_btag_freelist_count++; 721 } 722 vmem_bootstrapped = TRUE; 723 } 724 725 void 726 vmem_subsystem_init(vmem_t *vm) 727 { 728 729 kmem_va_meta_arena = vmem_init(&kmem_va_meta_arena_store, "vmem-va", 730 0, 0, PAGE_SIZE, vmem_alloc, vmem_free, vm, 731 0, VM_NOSLEEP | VM_BOOTSTRAP | VM_LARGEIMPORT, 732 IPL_VM); 733 734 kmem_meta_arena = vmem_init(&kmem_meta_arena_store, "vmem-meta", 735 0, 0, PAGE_SIZE, 736 uvm_km_kmem_alloc, uvm_km_kmem_free, kmem_va_meta_arena, 737 0, VM_NOSLEEP | VM_BOOTSTRAP, IPL_VM); 738 739 pool_init(&vmem_btag_pool, sizeof(bt_t), coherency_unit, 0, 740 PR_PHINPAGE, "vmembt", &pool_allocator_vmem_meta, IPL_VM); 741 vmem_btag_pool_initialized = true; 742 } 743 #endif /* defined(_KERNEL) */ 744 745 static int 746 vmem_add1(vmem_t *vm, vmem_addr_t addr, vmem_size_t size, vm_flag_t flags, 747 int spanbttype) 748 { 749 bt_t *btspan; 750 bt_t *btfree; 751 752 VMEM_ASSERT_LOCKED(vm); 753 KASSERT((flags & (VM_SLEEP|VM_NOSLEEP)) != 0); 754 KASSERT((~flags & (VM_SLEEP|VM_NOSLEEP)) != 0); 755 KASSERT(spanbttype == BT_TYPE_SPAN || 756 spanbttype == BT_TYPE_SPAN_STATIC); 757 758 btspan = bt_alloc(vm, flags); 759 if (btspan == NULL) { 760 return SET_ERROR(ENOMEM); 761 } 762 btfree = bt_alloc(vm, flags); 763 if (btfree == NULL) { 764 bt_free(vm, btspan); 765 return SET_ERROR(ENOMEM); 766 } 767 768 btspan->bt_type = spanbttype; 769 btspan->bt_start = addr; 770 btspan->bt_size = size; 771 772 btfree->bt_type = BT_TYPE_FREE; 773 btfree->bt_start = addr; 774 btfree->bt_size = size; 775 776 bt_insseg_tail(vm, btspan); 777 bt_insseg(vm, btfree, btspan); 778 bt_insfree(vm, btfree); 779 vm->vm_size += size; 780 781 return 0; 782 } 783 784 static void 785 vmem_destroy1(vmem_t *vm) 786 { 787 788 #if defined(QCACHE) 789 qc_destroy(vm); 790 #endif /* defined(QCACHE) */ 791 VMEM_LOCK(vm); 792 793 for (int i = 0; i < vm->vm_hashsize; i++) { 794 bt_t *bt; 795 796 while ((bt = LIST_FIRST(&vm->vm_hashlist[i])) != NULL) { 797 KASSERT(bt->bt_type == BT_TYPE_SPAN_STATIC); 798 LIST_REMOVE(bt, bt_hashlist); 799 bt_free(vm, bt); 800 } 801 } 802 803 /* bt_freetrim() drops the lock. */ 804 bt_freetrim(vm, 0); 805 if (vm->vm_hashlist != &vm->vm_hash0) { 806 xfree(vm->vm_hashlist, 807 sizeof(struct vmem_hashlist) * vm->vm_hashsize); 808 } 809 810 VMEM_CONDVAR_DESTROY(vm); 811 VMEM_LOCK_DESTROY(vm); 812 xfree(vm, sizeof(*vm)); 813 } 814 815 static int 816 vmem_import(vmem_t *vm, vmem_size_t size, vm_flag_t flags) 817 { 818 vmem_addr_t addr; 819 int rc; 820 821 VMEM_ASSERT_LOCKED(vm); 822 823 if (vm->vm_importfn == NULL) { 824 return SET_ERROR(EINVAL); 825 } 826 827 if (vm->vm_flags & VM_LARGEIMPORT) { 828 size *= 16; 829 } 830 831 VMEM_UNLOCK(vm); 832 if (vm->vm_flags & VM_XIMPORT) { 833 rc = __FPTRCAST(vmem_ximport_t *, vm->vm_importfn)(vm->vm_arg, 834 size, &size, flags, &addr); 835 } else { 836 rc = (vm->vm_importfn)(vm->vm_arg, size, flags, &addr); 837 } 838 VMEM_LOCK(vm); 839 840 if (rc) { 841 return SET_ERROR(ENOMEM); 842 } 843 844 if (vmem_add1(vm, addr, size, flags, BT_TYPE_SPAN) != 0) { 845 VMEM_UNLOCK(vm); 846 (*vm->vm_releasefn)(vm->vm_arg, addr, size); 847 VMEM_LOCK(vm); 848 return SET_ERROR(ENOMEM); 849 } 850 851 return 0; 852 } 853 854 #if defined(_KERNEL) 855 static int 856 vmem_rehash(vmem_t *vm, size_t newhashsize, vm_flag_t flags) 857 { 858 bt_t *bt; 859 int i; 860 struct vmem_hashlist *newhashlist; 861 struct vmem_hashlist *oldhashlist; 862 size_t oldhashsize; 863 864 KASSERT(newhashsize > 0); 865 866 /* Round hash size up to a power of 2. */ 867 newhashsize = 1 << (ilog2(newhashsize) + 1); 868 869 newhashlist = 870 xmalloc(sizeof(struct vmem_hashlist) * newhashsize, flags); 871 if (newhashlist == NULL) { 872 return SET_ERROR(ENOMEM); 873 } 874 for (i = 0; i < newhashsize; i++) { 875 LIST_INIT(&newhashlist[i]); 876 } 877 878 VMEM_LOCK(vm); 879 /* Decay back to a small hash slowly. */ 880 if (vm->vm_maxbusytag >= 2) { 881 vm->vm_maxbusytag = vm->vm_maxbusytag / 2 - 1; 882 if (vm->vm_nbusytag > vm->vm_maxbusytag) { 883 vm->vm_maxbusytag = vm->vm_nbusytag; 884 } 885 } else { 886 vm->vm_maxbusytag = vm->vm_nbusytag; 887 } 888 oldhashlist = vm->vm_hashlist; 889 oldhashsize = vm->vm_hashsize; 890 vm->vm_hashlist = newhashlist; 891 vm->vm_hashsize = newhashsize; 892 vm->vm_hashmask = newhashsize - 1; 893 if (oldhashlist == NULL) { 894 VMEM_UNLOCK(vm); 895 return 0; 896 } 897 for (i = 0; i < oldhashsize; i++) { 898 while ((bt = LIST_FIRST(&oldhashlist[i])) != NULL) { 899 bt_rembusy(vm, bt); /* XXX */ 900 bt_insbusy(vm, bt); 901 } 902 } 903 VMEM_UNLOCK(vm); 904 905 if (oldhashlist != &vm->vm_hash0) { 906 xfree(oldhashlist, 907 sizeof(struct vmem_hashlist) * oldhashsize); 908 } 909 910 return 0; 911 } 912 #endif /* _KERNEL */ 913 914 /* 915 * vmem_fit: check if a bt can satisfy the given restrictions. 916 * 917 * it's a caller's responsibility to ensure the region is big enough 918 * before calling us. 919 */ 920 921 static int 922 vmem_fit(const bt_t *bt, vmem_size_t size, vmem_size_t align, 923 vmem_size_t phase, vmem_size_t nocross, 924 vmem_addr_t minaddr, vmem_addr_t maxaddr, vmem_addr_t *addrp) 925 { 926 vmem_addr_t start; 927 vmem_addr_t end; 928 929 KASSERT(size > 0); 930 KASSERT(bt->bt_size >= size); /* caller's responsibility */ 931 932 /* 933 * XXX assumption: vmem_addr_t and vmem_size_t are 934 * unsigned integer of the same size. 935 */ 936 937 start = bt->bt_start; 938 if (start < minaddr) { 939 start = minaddr; 940 } 941 end = BT_END(bt); 942 if (end > maxaddr) { 943 end = maxaddr; 944 } 945 if (start > end) { 946 return SET_ERROR(ENOMEM); 947 } 948 949 start = VMEM_ALIGNUP(start - phase, align) + phase; 950 if (start < bt->bt_start) { 951 start += align; 952 } 953 if (VMEM_CROSS_P(start, start + size - 1, nocross)) { 954 KASSERT(align < nocross); 955 start = VMEM_ALIGNUP(start - phase, nocross) + phase; 956 } 957 if (start <= end && end - start >= size - 1) { 958 KASSERT((start & (align - 1)) == phase); 959 KASSERT(!VMEM_CROSS_P(start, start + size - 1, nocross)); 960 KASSERT(minaddr <= start); 961 KASSERT(maxaddr == 0 || start + size - 1 <= maxaddr); 962 KASSERT(bt->bt_start <= start); 963 KASSERT(BT_END(bt) - start >= size - 1); 964 *addrp = start; 965 return 0; 966 } 967 return SET_ERROR(ENOMEM); 968 } 969 970 /* ---- vmem API */ 971 972 /* 973 * vmem_init: creates a vmem arena. 974 */ 975 976 vmem_t * 977 vmem_init(vmem_t *vm, const char *name, 978 vmem_addr_t base, vmem_size_t size, vmem_size_t quantum, 979 vmem_import_t *importfn, vmem_release_t *releasefn, 980 vmem_t *arg, vmem_size_t qcache_max, vm_flag_t flags, int ipl) 981 { 982 int i; 983 984 KASSERT((flags & (VM_SLEEP|VM_NOSLEEP)) != 0); 985 KASSERT((~flags & (VM_SLEEP|VM_NOSLEEP)) != 0); 986 KASSERT(quantum > 0); 987 KASSERT(powerof2(quantum)); 988 989 /* 990 * If private tags are going to be used, they must 991 * be added to the arena before the first span is 992 * added. 993 */ 994 KASSERT((flags & VM_PRIVTAGS) == 0 || size == 0); 995 996 #if defined(_KERNEL) 997 /* XXX: SMP, we get called early... */ 998 if (!vmem_bootstrapped) { 999 vmem_bootstrap(); 1000 } 1001 #endif /* defined(_KERNEL) */ 1002 1003 if (vm == NULL) { 1004 vm = xmalloc(sizeof(*vm), flags); 1005 } 1006 if (vm == NULL) { 1007 return NULL; 1008 } 1009 1010 VMEM_CONDVAR_INIT(vm, "vmem"); 1011 VMEM_LOCK_INIT(vm, ipl); 1012 vm->vm_flags = flags; 1013 vm->vm_nfreetags = 0; 1014 LIST_INIT(&vm->vm_freetags); 1015 strlcpy(vm->vm_name, name, sizeof(vm->vm_name)); 1016 vm->vm_quantum_mask = quantum - 1; 1017 vm->vm_quantum_shift = SIZE2ORDER(quantum); 1018 KASSERT(ORDER2SIZE(vm->vm_quantum_shift) == quantum); 1019 vm->vm_importfn = importfn; 1020 vm->vm_releasefn = releasefn; 1021 vm->vm_arg = arg; 1022 vm->vm_nbusytag = 0; 1023 vm->vm_maxbusytag = 0; 1024 vm->vm_size = 0; 1025 vm->vm_inuse = 0; 1026 #if defined(QCACHE) 1027 qc_init(vm, qcache_max, ipl); 1028 #endif /* defined(QCACHE) */ 1029 1030 TAILQ_INIT(&vm->vm_seglist); 1031 for (i = 0; i < VMEM_MAXORDER; i++) { 1032 LIST_INIT(&vm->vm_freelist[i]); 1033 } 1034 memset(&vm->vm_hash0, 0, sizeof(vm->vm_hash0)); 1035 vm->vm_hashsize = 1; 1036 vm->vm_hashmask = vm->vm_hashsize - 1; 1037 vm->vm_hashlist = &vm->vm_hash0; 1038 1039 if (size != 0) { 1040 if (vmem_add(vm, base, size, flags) != 0) { 1041 vmem_destroy1(vm); 1042 return NULL; 1043 } 1044 } 1045 1046 #if defined(_KERNEL) 1047 if (flags & VM_BOOTSTRAP) { 1048 bt_refill(vm); 1049 } 1050 1051 mutex_enter(&vmem_list_lock); 1052 LIST_INSERT_HEAD(&vmem_list, vm, vm_alllist); 1053 mutex_exit(&vmem_list_lock); 1054 #endif /* defined(_KERNEL) */ 1055 1056 return vm; 1057 } 1058 1059 1060 1061 /* 1062 * vmem_create: create an arena. 1063 * 1064 * => must not be called from interrupt context. 1065 */ 1066 1067 vmem_t * 1068 vmem_create(const char *name, vmem_addr_t base, vmem_size_t size, 1069 vmem_size_t quantum, vmem_import_t *importfn, vmem_release_t *releasefn, 1070 vmem_t *source, vmem_size_t qcache_max, vm_flag_t flags, int ipl) 1071 { 1072 1073 KASSERT((flags & (VM_XIMPORT)) == 0); 1074 1075 return vmem_init(NULL, name, base, size, quantum, 1076 importfn, releasefn, source, qcache_max, flags, ipl); 1077 } 1078 1079 /* 1080 * vmem_xcreate: create an arena takes alternative import func. 1081 * 1082 * => must not be called from interrupt context. 1083 */ 1084 1085 vmem_t * 1086 vmem_xcreate(const char *name, vmem_addr_t base, vmem_size_t size, 1087 vmem_size_t quantum, vmem_ximport_t *importfn, vmem_release_t *releasefn, 1088 vmem_t *source, vmem_size_t qcache_max, vm_flag_t flags, int ipl) 1089 { 1090 1091 KASSERT((flags & (VM_XIMPORT)) == 0); 1092 1093 return vmem_init(NULL, name, base, size, quantum, 1094 __FPTRCAST(vmem_import_t *, importfn), releasefn, source, 1095 qcache_max, flags | VM_XIMPORT, ipl); 1096 } 1097 1098 void 1099 vmem_destroy(vmem_t *vm) 1100 { 1101 1102 #if defined(_KERNEL) 1103 mutex_enter(&vmem_list_lock); 1104 LIST_REMOVE(vm, vm_alllist); 1105 mutex_exit(&vmem_list_lock); 1106 #endif /* defined(_KERNEL) */ 1107 1108 vmem_destroy1(vm); 1109 } 1110 1111 vmem_size_t 1112 vmem_roundup_size(vmem_t *vm, vmem_size_t size) 1113 { 1114 1115 return (size + vm->vm_quantum_mask) & ~vm->vm_quantum_mask; 1116 } 1117 1118 /* 1119 * vmem_alloc: allocate resource from the arena. 1120 */ 1121 1122 int 1123 vmem_alloc(vmem_t *vm, vmem_size_t size, vm_flag_t flags, vmem_addr_t *addrp) 1124 { 1125 const vm_flag_t strat __diagused = flags & VM_FITMASK; 1126 int error; 1127 1128 KASSERT((flags & (VM_SLEEP|VM_NOSLEEP)) != 0); 1129 KASSERT((~flags & (VM_SLEEP|VM_NOSLEEP)) != 0); 1130 1131 KASSERT(size > 0); 1132 KASSERT(strat == VM_BESTFIT || strat == VM_INSTANTFIT); 1133 if ((flags & VM_SLEEP) != 0) { 1134 ASSERT_SLEEPABLE(); 1135 } 1136 1137 #if defined(QCACHE) 1138 if (size <= vm->vm_qcache_max) { 1139 void *p; 1140 int qidx = (size + vm->vm_quantum_mask) >> vm->vm_quantum_shift; 1141 qcache_t *qc = vm->vm_qcache[qidx - 1]; 1142 1143 p = pool_cache_get(qc->qc_cache, vmf_to_prf(flags)); 1144 if (addrp != NULL) 1145 *addrp = (vmem_addr_t)p; 1146 error = (p == NULL) ? SET_ERROR(ENOMEM) : 0; 1147 goto out; 1148 } 1149 #endif /* defined(QCACHE) */ 1150 1151 error = vmem_xalloc(vm, size, 0, 0, 0, VMEM_ADDR_MIN, VMEM_ADDR_MAX, 1152 flags, addrp); 1153 #if defined(QCACHE) 1154 out: 1155 #endif /* defined(QCACHE) */ 1156 KASSERTMSG(error || addrp == NULL || 1157 (*addrp & vm->vm_quantum_mask) == 0, 1158 "vmem %s mask=0x%jx addr=0x%jx", 1159 vm->vm_name, (uintmax_t)vm->vm_quantum_mask, (uintmax_t)*addrp); 1160 KASSERT(error == 0 || (flags & VM_SLEEP) == 0); 1161 return error; 1162 } 1163 1164 int 1165 vmem_xalloc_addr(vmem_t *vm, const vmem_addr_t addr, const vmem_size_t size, 1166 vm_flag_t flags) 1167 { 1168 vmem_addr_t result; 1169 int error; 1170 1171 KASSERT((addr & vm->vm_quantum_mask) == 0); 1172 KASSERT(size != 0); 1173 1174 flags = (flags & ~VM_INSTANTFIT) | VM_BESTFIT; 1175 1176 error = vmem_xalloc(vm, size, 0, 0, 0, addr, addr + size - 1, 1177 flags, &result); 1178 1179 KASSERT(error || result == addr); 1180 KASSERT(error == 0 || (flags & VM_SLEEP) == 0); 1181 return error; 1182 } 1183 1184 int 1185 vmem_xalloc(vmem_t *vm, const vmem_size_t size0, vmem_size_t align, 1186 const vmem_size_t phase, const vmem_size_t nocross, 1187 const vmem_addr_t minaddr, const vmem_addr_t maxaddr, const vm_flag_t flags, 1188 vmem_addr_t *addrp) 1189 { 1190 struct vmem_freelist *list; 1191 struct vmem_freelist *first; 1192 struct vmem_freelist *end; 1193 bt_t *bt; 1194 bt_t *btnew; 1195 bt_t *btnew2; 1196 const vmem_size_t size = vmem_roundup_size(vm, size0); 1197 vm_flag_t strat = flags & VM_FITMASK; 1198 vmem_addr_t start; 1199 int rc; 1200 1201 KASSERT(size0 > 0); 1202 KASSERT(size > 0); 1203 KASSERT(strat == VM_BESTFIT || strat == VM_INSTANTFIT); 1204 if ((flags & VM_SLEEP) != 0) { 1205 ASSERT_SLEEPABLE(); 1206 } 1207 KASSERT((align & vm->vm_quantum_mask) == 0); 1208 KASSERT((align & (align - 1)) == 0); 1209 KASSERT((phase & vm->vm_quantum_mask) == 0); 1210 KASSERT((nocross & vm->vm_quantum_mask) == 0); 1211 KASSERT((nocross & (nocross - 1)) == 0); 1212 KASSERT(align == 0 || phase < align); 1213 KASSERT(phase == 0 || phase < align); 1214 KASSERT(nocross == 0 || nocross >= size); 1215 KASSERT(minaddr <= maxaddr); 1216 KASSERT(!VMEM_CROSS_P(phase, phase + size - 1, nocross)); 1217 1218 if (align == 0) { 1219 align = vm->vm_quantum_mask + 1; 1220 } 1221 1222 /* 1223 * allocate boundary tags before acquiring the vmem lock. 1224 */ 1225 VMEM_LOCK(vm); 1226 btnew = bt_alloc(vm, flags); 1227 if (btnew == NULL) { 1228 VMEM_UNLOCK(vm); 1229 return SET_ERROR(ENOMEM); 1230 } 1231 btnew2 = bt_alloc(vm, flags); /* XXX not necessary if no restrictions */ 1232 if (btnew2 == NULL) { 1233 bt_free(vm, btnew); 1234 VMEM_UNLOCK(vm); 1235 return SET_ERROR(ENOMEM); 1236 } 1237 1238 /* 1239 * choose a free block from which we allocate. 1240 */ 1241 retry_strat: 1242 first = bt_freehead_toalloc(vm, size, strat); 1243 end = &vm->vm_freelist[VMEM_MAXORDER]; 1244 retry: 1245 bt = NULL; 1246 vmem_check(vm); 1247 if (strat == VM_INSTANTFIT) { 1248 /* 1249 * just choose the first block which satisfies our restrictions. 1250 * 1251 * note that we don't need to check the size of the blocks 1252 * because any blocks found on these list should be larger than 1253 * the given size. 1254 */ 1255 for (list = first; list < end; list++) { 1256 bt = LIST_FIRST(list); 1257 if (bt != NULL) { 1258 rc = vmem_fit(bt, size, align, phase, 1259 nocross, minaddr, maxaddr, &start); 1260 if (rc == 0) { 1261 goto gotit; 1262 } 1263 /* 1264 * don't bother to follow the bt_freelist link 1265 * here. the list can be very long and we are 1266 * told to run fast. blocks from the later free 1267 * lists are larger and have better chances to 1268 * satisfy our restrictions. 1269 */ 1270 } 1271 } 1272 } else { /* VM_BESTFIT */ 1273 /* 1274 * we assume that, for space efficiency, it's better to 1275 * allocate from a smaller block. thus we will start searching 1276 * from the lower-order list than VM_INSTANTFIT. 1277 * however, don't bother to find the smallest block in a free 1278 * list because the list can be very long. we can revisit it 1279 * if/when it turns out to be a problem. 1280 * 1281 * note that the 'first' list can contain blocks smaller than 1282 * the requested size. thus we need to check bt_size. 1283 */ 1284 for (list = first; list < end; list++) { 1285 LIST_FOREACH(bt, list, bt_freelist) { 1286 if (bt->bt_size >= size) { 1287 rc = vmem_fit(bt, size, align, phase, 1288 nocross, minaddr, maxaddr, &start); 1289 if (rc == 0) { 1290 goto gotit; 1291 } 1292 } 1293 } 1294 } 1295 } 1296 #if 1 1297 if (strat == VM_INSTANTFIT) { 1298 strat = VM_BESTFIT; 1299 goto retry_strat; 1300 } 1301 #endif 1302 if (align != vm->vm_quantum_mask + 1 || phase != 0 || nocross != 0) { 1303 1304 /* 1305 * XXX should try to import a region large enough to 1306 * satisfy restrictions? 1307 */ 1308 1309 goto fail; 1310 } 1311 /* XXX eeek, minaddr & maxaddr not respected */ 1312 if (vmem_import(vm, size, flags) == 0) { 1313 goto retry; 1314 } 1315 /* XXX */ 1316 1317 if ((flags & VM_SLEEP) != 0) { 1318 vmem_kick_pdaemon(); 1319 VMEM_CONDVAR_WAIT(vm); 1320 goto retry; 1321 } 1322 fail: 1323 bt_free(vm, btnew); 1324 bt_free(vm, btnew2); 1325 VMEM_UNLOCK(vm); 1326 return SET_ERROR(ENOMEM); 1327 1328 gotit: 1329 KASSERT(bt->bt_type == BT_TYPE_FREE); 1330 KASSERT(bt->bt_size >= size); 1331 bt_remfree(vm, bt); 1332 vmem_check(vm); 1333 if (bt->bt_start != start) { 1334 btnew2->bt_type = BT_TYPE_FREE; 1335 btnew2->bt_start = bt->bt_start; 1336 btnew2->bt_size = start - bt->bt_start; 1337 bt->bt_start = start; 1338 bt->bt_size -= btnew2->bt_size; 1339 bt_insfree(vm, btnew2); 1340 bt_insseg(vm, btnew2, TAILQ_PREV(bt, vmem_seglist, bt_seglist)); 1341 btnew2 = NULL; 1342 vmem_check(vm); 1343 } 1344 KASSERT(bt->bt_start == start); 1345 if (bt->bt_size != size && bt->bt_size - size > vm->vm_quantum_mask) { 1346 /* split */ 1347 btnew->bt_type = BT_TYPE_BUSY; 1348 btnew->bt_start = bt->bt_start; 1349 btnew->bt_size = size; 1350 bt->bt_start = bt->bt_start + size; 1351 bt->bt_size -= size; 1352 bt_insfree(vm, bt); 1353 bt_insseg(vm, btnew, TAILQ_PREV(bt, vmem_seglist, bt_seglist)); 1354 bt_insbusy(vm, btnew); 1355 vmem_check(vm); 1356 } else { 1357 bt->bt_type = BT_TYPE_BUSY; 1358 bt_insbusy(vm, bt); 1359 vmem_check(vm); 1360 bt_free(vm, btnew); 1361 btnew = bt; 1362 } 1363 if (btnew2 != NULL) { 1364 bt_free(vm, btnew2); 1365 } 1366 KASSERT(btnew->bt_size >= size); 1367 btnew->bt_type = BT_TYPE_BUSY; 1368 if (addrp != NULL) 1369 *addrp = btnew->bt_start; 1370 VMEM_UNLOCK(vm); 1371 KASSERTMSG(addrp == NULL || 1372 (*addrp & vm->vm_quantum_mask) == 0, 1373 "vmem %s mask=0x%jx addr=0x%jx", 1374 vm->vm_name, (uintmax_t)vm->vm_quantum_mask, (uintmax_t)*addrp); 1375 return 0; 1376 } 1377 1378 /* 1379 * vmem_free: free the resource to the arena. 1380 */ 1381 1382 void 1383 vmem_free(vmem_t *vm, vmem_addr_t addr, vmem_size_t size) 1384 { 1385 1386 KASSERT(size > 0); 1387 KASSERTMSG((addr & vm->vm_quantum_mask) == 0, 1388 "vmem %s mask=0x%jx addr=0x%jx", 1389 vm->vm_name, (uintmax_t)vm->vm_quantum_mask, (uintmax_t)addr); 1390 1391 #if defined(QCACHE) 1392 if (size <= vm->vm_qcache_max) { 1393 int qidx = (size + vm->vm_quantum_mask) >> vm->vm_quantum_shift; 1394 qcache_t *qc = vm->vm_qcache[qidx - 1]; 1395 1396 pool_cache_put(qc->qc_cache, (void *)addr); 1397 return; 1398 } 1399 #endif /* defined(QCACHE) */ 1400 1401 vmem_xfree(vm, addr, size); 1402 } 1403 1404 void 1405 vmem_xfree(vmem_t *vm, vmem_addr_t addr, vmem_size_t size) 1406 { 1407 bt_t *bt; 1408 1409 KASSERT(size > 0); 1410 KASSERTMSG((addr & vm->vm_quantum_mask) == 0, 1411 "vmem %s mask=0x%jx addr=0x%jx", 1412 vm->vm_name, (uintmax_t)vm->vm_quantum_mask, (uintmax_t)addr); 1413 1414 VMEM_LOCK(vm); 1415 1416 bt = bt_lookupbusy(vm, addr); 1417 KASSERTMSG(bt != NULL, "vmem %s addr 0x%jx size 0x%jx", 1418 vm->vm_name, (uintmax_t)addr, (uintmax_t)size); 1419 KASSERT(bt->bt_start == addr); 1420 KASSERT(bt->bt_size == vmem_roundup_size(vm, size) || 1421 bt->bt_size - vmem_roundup_size(vm, size) <= vm->vm_quantum_mask); 1422 1423 /* vmem_xfree_bt() drops the lock. */ 1424 vmem_xfree_bt(vm, bt); 1425 } 1426 1427 void 1428 vmem_xfreeall(vmem_t *vm) 1429 { 1430 bt_t *bt; 1431 1432 #if defined(QCACHE) 1433 /* This can't be used if the arena has a quantum cache. */ 1434 KASSERT(vm->vm_qcache_max == 0); 1435 #endif /* defined(QCACHE) */ 1436 1437 for (;;) { 1438 VMEM_LOCK(vm); 1439 TAILQ_FOREACH(bt, &vm->vm_seglist, bt_seglist) { 1440 if (bt->bt_type == BT_TYPE_BUSY) 1441 break; 1442 } 1443 if (bt != NULL) { 1444 /* vmem_xfree_bt() drops the lock. */ 1445 vmem_xfree_bt(vm, bt); 1446 } else { 1447 VMEM_UNLOCK(vm); 1448 return; 1449 } 1450 } 1451 } 1452 1453 static void 1454 vmem_xfree_bt(vmem_t *vm, bt_t *bt) 1455 { 1456 bt_t *t; 1457 1458 VMEM_ASSERT_LOCKED(vm); 1459 1460 KASSERT(bt->bt_type == BT_TYPE_BUSY); 1461 bt_rembusy(vm, bt); 1462 bt->bt_type = BT_TYPE_FREE; 1463 1464 /* coalesce */ 1465 t = TAILQ_NEXT(bt, bt_seglist); 1466 if (t != NULL && t->bt_type == BT_TYPE_FREE) { 1467 KASSERT(BT_END(bt) < t->bt_start); /* YYY */ 1468 bt_remfree(vm, t); 1469 bt_remseg(vm, t); 1470 bt->bt_size += t->bt_size; 1471 bt_free(vm, t); 1472 } 1473 t = TAILQ_PREV(bt, vmem_seglist, bt_seglist); 1474 if (t != NULL && t->bt_type == BT_TYPE_FREE) { 1475 KASSERT(BT_END(t) < bt->bt_start); /* YYY */ 1476 bt_remfree(vm, t); 1477 bt_remseg(vm, t); 1478 bt->bt_size += t->bt_size; 1479 bt->bt_start = t->bt_start; 1480 bt_free(vm, t); 1481 } 1482 1483 t = TAILQ_PREV(bt, vmem_seglist, bt_seglist); 1484 KASSERT(t != NULL); 1485 KASSERT(BT_ISSPAN_P(t) || t->bt_type == BT_TYPE_BUSY); 1486 if (vm->vm_releasefn != NULL && t->bt_type == BT_TYPE_SPAN && 1487 t->bt_size == bt->bt_size) { 1488 vmem_addr_t spanaddr; 1489 vmem_size_t spansize; 1490 1491 KASSERT(t->bt_start == bt->bt_start); 1492 spanaddr = bt->bt_start; 1493 spansize = bt->bt_size; 1494 bt_remseg(vm, bt); 1495 bt_free(vm, bt); 1496 bt_remseg(vm, t); 1497 bt_free(vm, t); 1498 vm->vm_size -= spansize; 1499 VMEM_CONDVAR_BROADCAST(vm); 1500 /* bt_freetrim() drops the lock. */ 1501 bt_freetrim(vm, BT_MAXFREE); 1502 (*vm->vm_releasefn)(vm->vm_arg, spanaddr, spansize); 1503 } else { 1504 bt_insfree(vm, bt); 1505 VMEM_CONDVAR_BROADCAST(vm); 1506 /* bt_freetrim() drops the lock. */ 1507 bt_freetrim(vm, BT_MAXFREE); 1508 } 1509 } 1510 1511 /* 1512 * vmem_add: 1513 * 1514 * => caller must ensure appropriate spl, 1515 * if the arena can be accessed from interrupt context. 1516 */ 1517 1518 int 1519 vmem_add(vmem_t *vm, vmem_addr_t addr, vmem_size_t size, vm_flag_t flags) 1520 { 1521 int rv; 1522 1523 VMEM_LOCK(vm); 1524 rv = vmem_add1(vm, addr, size, flags, BT_TYPE_SPAN_STATIC); 1525 VMEM_UNLOCK(vm); 1526 1527 return rv; 1528 } 1529 1530 /* 1531 * vmem_size: information about arenas size 1532 * 1533 * => return free/allocated size in arena 1534 */ 1535 vmem_size_t 1536 vmem_size(vmem_t *vm, int typemask) 1537 { 1538 1539 switch (typemask) { 1540 case VMEM_ALLOC: 1541 return vm->vm_inuse; 1542 case VMEM_FREE: 1543 return vm->vm_size - vm->vm_inuse; 1544 case VMEM_FREE|VMEM_ALLOC: 1545 return vm->vm_size; 1546 default: 1547 panic("vmem_size"); 1548 } 1549 } 1550 1551 /* ---- rehash */ 1552 1553 #if defined(_KERNEL) 1554 static struct callout vmem_rehash_ch; 1555 static int vmem_rehash_interval; 1556 static struct workqueue *vmem_rehash_wq; 1557 static struct work vmem_rehash_wk; 1558 1559 static void 1560 vmem_rehash_all(struct work *wk, void *dummy) 1561 { 1562 vmem_t *vm; 1563 1564 KASSERT(wk == &vmem_rehash_wk); 1565 mutex_enter(&vmem_list_lock); 1566 LIST_FOREACH(vm, &vmem_list, vm_alllist) { 1567 size_t desired; 1568 size_t current; 1569 1570 desired = atomic_load_relaxed(&vm->vm_maxbusytag); 1571 current = atomic_load_relaxed(&vm->vm_hashsize); 1572 1573 if (desired > VMEM_HASHSIZE_MAX) { 1574 desired = VMEM_HASHSIZE_MAX; 1575 } else if (desired < VMEM_HASHSIZE_MIN) { 1576 desired = VMEM_HASHSIZE_MIN; 1577 } 1578 if (desired > current * 2 || desired * 2 < current) { 1579 vmem_rehash(vm, desired, VM_NOSLEEP); 1580 } 1581 } 1582 mutex_exit(&vmem_list_lock); 1583 1584 callout_schedule(&vmem_rehash_ch, vmem_rehash_interval); 1585 } 1586 1587 static void 1588 vmem_rehash_all_kick(void *dummy) 1589 { 1590 1591 workqueue_enqueue(vmem_rehash_wq, &vmem_rehash_wk, NULL); 1592 } 1593 1594 void 1595 vmem_rehash_start(void) 1596 { 1597 int error; 1598 1599 error = workqueue_create(&vmem_rehash_wq, "vmem_rehash", 1600 vmem_rehash_all, NULL, PRI_VM, IPL_SOFTCLOCK, WQ_MPSAFE); 1601 if (error) { 1602 panic("%s: workqueue_create %d\n", __func__, error); 1603 } 1604 callout_init(&vmem_rehash_ch, CALLOUT_MPSAFE); 1605 callout_setfunc(&vmem_rehash_ch, vmem_rehash_all_kick, NULL); 1606 1607 vmem_rehash_interval = hz * 10; 1608 callout_schedule(&vmem_rehash_ch, vmem_rehash_interval); 1609 } 1610 #endif /* defined(_KERNEL) */ 1611 1612 /* ---- debug */ 1613 1614 #if defined(DDB) || defined(UNITTEST) || defined(VMEM_SANITY) 1615 1616 static void bt_dump(const bt_t *, void (*)(const char *, ...) 1617 __printflike(1, 2)); 1618 1619 static const char * 1620 bt_type_string(int type) 1621 { 1622 static const char * const table[] = { 1623 [BT_TYPE_BUSY] = "busy", 1624 [BT_TYPE_FREE] = "free", 1625 [BT_TYPE_SPAN] = "span", 1626 [BT_TYPE_SPAN_STATIC] = "static span", 1627 }; 1628 1629 if (type >= __arraycount(table)) { 1630 return "BOGUS"; 1631 } 1632 return table[type]; 1633 } 1634 1635 static void 1636 bt_dump(const bt_t *bt, void (*pr)(const char *, ...)) 1637 { 1638 1639 (*pr)("\t%p: %" PRIu64 ", %" PRIu64 ", %d(%s)\n", 1640 bt, (uint64_t)bt->bt_start, (uint64_t)bt->bt_size, 1641 bt->bt_type, bt_type_string(bt->bt_type)); 1642 } 1643 1644 static void 1645 vmem_dump(const vmem_t *vm , void (*pr)(const char *, ...) __printflike(1, 2)) 1646 { 1647 const bt_t *bt; 1648 int i; 1649 1650 (*pr)("vmem %p '%s'\n", vm, vm->vm_name); 1651 TAILQ_FOREACH(bt, &vm->vm_seglist, bt_seglist) { 1652 bt_dump(bt, pr); 1653 } 1654 1655 for (i = 0; i < VMEM_MAXORDER; i++) { 1656 const struct vmem_freelist *fl = &vm->vm_freelist[i]; 1657 1658 if (LIST_EMPTY(fl)) { 1659 continue; 1660 } 1661 1662 (*pr)("freelist[%d]\n", i); 1663 LIST_FOREACH(bt, fl, bt_freelist) { 1664 bt_dump(bt, pr); 1665 } 1666 } 1667 } 1668 1669 #endif /* defined(DDB) || defined(UNITTEST) || defined(VMEM_SANITY) */ 1670 1671 #if defined(DDB) 1672 static bt_t * 1673 vmem_whatis_lookup(vmem_t *vm, uintptr_t addr) 1674 { 1675 bt_t *bt; 1676 1677 TAILQ_FOREACH(bt, &vm->vm_seglist, bt_seglist) { 1678 if (BT_ISSPAN_P(bt)) { 1679 continue; 1680 } 1681 if (bt->bt_start <= addr && addr <= BT_END(bt)) { 1682 return bt; 1683 } 1684 } 1685 1686 return NULL; 1687 } 1688 1689 void 1690 vmem_whatis(uintptr_t addr, void (*pr)(const char *, ...)) 1691 { 1692 vmem_t *vm; 1693 1694 LIST_FOREACH(vm, &vmem_list, vm_alllist) { 1695 bt_t *bt; 1696 1697 bt = vmem_whatis_lookup(vm, addr); 1698 if (bt == NULL) { 1699 continue; 1700 } 1701 (*pr)("%p is %p+%zu in VMEM '%s' (%s)\n", 1702 (void *)addr, (void *)bt->bt_start, 1703 (size_t)(addr - bt->bt_start), vm->vm_name, 1704 (bt->bt_type == BT_TYPE_BUSY) ? "allocated" : "free"); 1705 } 1706 } 1707 1708 void 1709 vmem_printall(const char *modif, void (*pr)(const char *, ...)) 1710 { 1711 const vmem_t *vm; 1712 1713 LIST_FOREACH(vm, &vmem_list, vm_alllist) { 1714 vmem_dump(vm, pr); 1715 } 1716 } 1717 1718 void 1719 vmem_print(uintptr_t addr, const char *modif, void (*pr)(const char *, ...)) 1720 { 1721 const vmem_t *vm = (const void *)addr; 1722 1723 vmem_dump(vm, pr); 1724 } 1725 #endif /* defined(DDB) */ 1726 1727 #if defined(_KERNEL) 1728 #define vmem_printf printf 1729 #else 1730 #include <stdio.h> 1731 #include <stdarg.h> 1732 1733 static void 1734 vmem_printf(const char *fmt, ...) 1735 { 1736 va_list ap; 1737 va_start(ap, fmt); 1738 vprintf(fmt, ap); 1739 va_end(ap); 1740 } 1741 #endif 1742 1743 #if defined(VMEM_SANITY) 1744 1745 static bool 1746 vmem_check_sanity(vmem_t *vm) 1747 { 1748 const bt_t *bt, *bt2; 1749 1750 KASSERT(vm != NULL); 1751 1752 TAILQ_FOREACH(bt, &vm->vm_seglist, bt_seglist) { 1753 if (bt->bt_start > BT_END(bt)) { 1754 printf("corrupted tag\n"); 1755 bt_dump(bt, vmem_printf); 1756 return false; 1757 } 1758 } 1759 TAILQ_FOREACH(bt, &vm->vm_seglist, bt_seglist) { 1760 TAILQ_FOREACH(bt2, &vm->vm_seglist, bt_seglist) { 1761 if (bt == bt2) { 1762 continue; 1763 } 1764 if (BT_ISSPAN_P(bt) != BT_ISSPAN_P(bt2)) { 1765 continue; 1766 } 1767 if (bt->bt_start <= BT_END(bt2) && 1768 bt2->bt_start <= BT_END(bt)) { 1769 printf("overwrapped tags\n"); 1770 bt_dump(bt, vmem_printf); 1771 bt_dump(bt2, vmem_printf); 1772 return false; 1773 } 1774 } 1775 } 1776 1777 return true; 1778 } 1779 1780 static void 1781 vmem_check(vmem_t *vm) 1782 { 1783 1784 if (!vmem_check_sanity(vm)) { 1785 panic("insanity vmem %p", vm); 1786 } 1787 } 1788 1789 #endif /* defined(VMEM_SANITY) */ 1790 1791 #if defined(UNITTEST) 1792 int 1793 main(void) 1794 { 1795 int rc; 1796 vmem_t *vm; 1797 vmem_addr_t p; 1798 struct reg { 1799 vmem_addr_t p; 1800 vmem_size_t sz; 1801 bool x; 1802 } *reg = NULL; 1803 int nreg = 0; 1804 int nalloc = 0; 1805 int nfree = 0; 1806 vmem_size_t total = 0; 1807 #if 1 1808 vm_flag_t strat = VM_INSTANTFIT; 1809 #else 1810 vm_flag_t strat = VM_BESTFIT; 1811 #endif 1812 1813 vm = vmem_create("test", 0, 0, 1, NULL, NULL, NULL, 0, VM_SLEEP, 1814 #ifdef _KERNEL 1815 IPL_NONE 1816 #else 1817 0 1818 #endif 1819 ); 1820 if (vm == NULL) { 1821 printf("vmem_create\n"); 1822 exit(EXIT_FAILURE); 1823 } 1824 vmem_dump(vm, vmem_printf); 1825 1826 rc = vmem_add(vm, 0, 50, VM_SLEEP); 1827 assert(rc == 0); 1828 rc = vmem_add(vm, 100, 200, VM_SLEEP); 1829 assert(rc == 0); 1830 rc = vmem_add(vm, 2000, 1, VM_SLEEP); 1831 assert(rc == 0); 1832 rc = vmem_add(vm, 40000, 65536, VM_SLEEP); 1833 assert(rc == 0); 1834 rc = vmem_add(vm, 10000, 10000, VM_SLEEP); 1835 assert(rc == 0); 1836 rc = vmem_add(vm, 500, 1000, VM_SLEEP); 1837 assert(rc == 0); 1838 rc = vmem_add(vm, 0xffffff00, 0x100, VM_SLEEP); 1839 assert(rc == 0); 1840 rc = vmem_xalloc(vm, 0x101, 0, 0, 0, 1841 0xffffff00, 0xffffffff, strat|VM_SLEEP, &p); 1842 assert(rc != 0); 1843 rc = vmem_xalloc(vm, 50, 0, 0, 0, 0, 49, strat|VM_SLEEP, &p); 1844 assert(rc == 0 && p == 0); 1845 vmem_xfree(vm, p, 50); 1846 rc = vmem_xalloc(vm, 25, 0, 0, 0, 0, 24, strat|VM_SLEEP, &p); 1847 assert(rc == 0 && p == 0); 1848 rc = vmem_xalloc(vm, 0x100, 0, 0, 0, 1849 0xffffff01, 0xffffffff, strat|VM_SLEEP, &p); 1850 assert(rc != 0); 1851 rc = vmem_xalloc(vm, 0x100, 0, 0, 0, 1852 0xffffff00, 0xfffffffe, strat|VM_SLEEP, &p); 1853 assert(rc != 0); 1854 rc = vmem_xalloc(vm, 0x100, 0, 0, 0, 1855 0xffffff00, 0xffffffff, strat|VM_SLEEP, &p); 1856 assert(rc == 0); 1857 vmem_dump(vm, vmem_printf); 1858 for (;;) { 1859 struct reg *r; 1860 int t = rand() % 100; 1861 1862 if (t > 45) { 1863 /* alloc */ 1864 vmem_size_t sz = rand() % 500 + 1; 1865 bool x; 1866 vmem_size_t align, phase, nocross; 1867 vmem_addr_t minaddr, maxaddr; 1868 1869 if (t > 70) { 1870 x = true; 1871 /* XXX */ 1872 align = 1 << (rand() % 15); 1873 phase = rand() % 65536; 1874 nocross = 1 << (rand() % 15); 1875 if (align <= phase) { 1876 phase = 0; 1877 } 1878 if (VMEM_CROSS_P(phase, phase + sz - 1, 1879 nocross)) { 1880 nocross = 0; 1881 } 1882 do { 1883 minaddr = rand() % 50000; 1884 maxaddr = rand() % 70000; 1885 } while (minaddr > maxaddr); 1886 printf("=== xalloc %" PRIu64 1887 " align=%" PRIu64 ", phase=%" PRIu64 1888 ", nocross=%" PRIu64 ", min=%" PRIu64 1889 ", max=%" PRIu64 "\n", 1890 (uint64_t)sz, 1891 (uint64_t)align, 1892 (uint64_t)phase, 1893 (uint64_t)nocross, 1894 (uint64_t)minaddr, 1895 (uint64_t)maxaddr); 1896 rc = vmem_xalloc(vm, sz, align, phase, nocross, 1897 minaddr, maxaddr, strat|VM_SLEEP, &p); 1898 } else { 1899 x = false; 1900 printf("=== alloc %" PRIu64 "\n", (uint64_t)sz); 1901 rc = vmem_alloc(vm, sz, strat|VM_SLEEP, &p); 1902 } 1903 printf("-> %" PRIu64 "\n", (uint64_t)p); 1904 vmem_dump(vm, vmem_printf); 1905 if (rc != 0) { 1906 if (x) { 1907 continue; 1908 } 1909 break; 1910 } 1911 nreg++; 1912 reg = realloc(reg, sizeof(*reg) * nreg); 1913 r = ®[nreg - 1]; 1914 r->p = p; 1915 r->sz = sz; 1916 r->x = x; 1917 total += sz; 1918 nalloc++; 1919 } else if (nreg != 0) { 1920 /* free */ 1921 r = ®[rand() % nreg]; 1922 printf("=== free %" PRIu64 ", %" PRIu64 "\n", 1923 (uint64_t)r->p, (uint64_t)r->sz); 1924 if (r->x) { 1925 vmem_xfree(vm, r->p, r->sz); 1926 } else { 1927 vmem_free(vm, r->p, r->sz); 1928 } 1929 total -= r->sz; 1930 vmem_dump(vm, vmem_printf); 1931 *r = reg[nreg - 1]; 1932 nreg--; 1933 nfree++; 1934 } 1935 printf("total=%" PRIu64 "\n", (uint64_t)total); 1936 } 1937 fprintf(stderr, "total=%" PRIu64 ", nalloc=%d, nfree=%d\n", 1938 (uint64_t)total, nalloc, nfree); 1939 exit(EXIT_SUCCESS); 1940 } 1941 #endif /* defined(UNITTEST) */ 1942