1 /* $NetBSD: subr_vmem.c,v 1.100 2019/12/21 14:50:34 ad Exp $ */ 2 3 /*- 4 * Copyright (c)2006,2007,2008,2009 YAMAMOTO Takashi, 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 */ 28 29 /* 30 * reference: 31 * - Magazines and Vmem: Extending the Slab Allocator 32 * to Many CPUs and Arbitrary Resources 33 * http://www.usenix.org/event/usenix01/bonwick.html 34 * 35 * locking & the boundary tag pool: 36 * - A pool(9) is used for vmem boundary tags 37 * - During a pool get call the global vmem_btag_refill_lock is taken, 38 * to serialize access to the allocation reserve, but no other 39 * vmem arena locks. 40 * - During pool_put calls no vmem mutexes are locked. 41 * - pool_drain doesn't hold the pool's mutex while releasing memory to 42 * its backing therefore no interferance with any vmem mutexes. 43 * - The boundary tag pool is forced to put page headers into pool pages 44 * (PR_PHINPAGE) and not off page to avoid pool recursion. 45 * (due to sizeof(bt_t) it should be the case anyway) 46 */ 47 48 #include <sys/cdefs.h> 49 __KERNEL_RCSID(0, "$NetBSD: subr_vmem.c,v 1.100 2019/12/21 14:50:34 ad Exp $"); 50 51 #if defined(_KERNEL) && defined(_KERNEL_OPT) 52 #include "opt_ddb.h" 53 #endif /* defined(_KERNEL) && defined(_KERNEL_OPT) */ 54 55 #include <sys/param.h> 56 #include <sys/hash.h> 57 #include <sys/queue.h> 58 #include <sys/bitops.h> 59 60 #if defined(_KERNEL) 61 #include <sys/systm.h> 62 #include <sys/kernel.h> /* hz */ 63 #include <sys/callout.h> 64 #include <sys/kmem.h> 65 #include <sys/pool.h> 66 #include <sys/vmem.h> 67 #include <sys/vmem_impl.h> 68 #include <sys/workqueue.h> 69 #include <sys/atomic.h> 70 #include <uvm/uvm.h> 71 #include <uvm/uvm_extern.h> 72 #include <uvm/uvm_km.h> 73 #include <uvm/uvm_page.h> 74 #include <uvm/uvm_pdaemon.h> 75 #else /* defined(_KERNEL) */ 76 #include <stdio.h> 77 #include <errno.h> 78 #include <assert.h> 79 #include <stdlib.h> 80 #include <string.h> 81 #include "../sys/vmem.h" 82 #include "../sys/vmem_impl.h" 83 #endif /* defined(_KERNEL) */ 84 85 86 #if defined(_KERNEL) 87 #include <sys/evcnt.h> 88 #define VMEM_EVCNT_DEFINE(name) \ 89 struct evcnt vmem_evcnt_##name = EVCNT_INITIALIZER(EVCNT_TYPE_MISC, NULL, \ 90 "vmem", #name); \ 91 EVCNT_ATTACH_STATIC(vmem_evcnt_##name); 92 #define VMEM_EVCNT_INCR(ev) vmem_evcnt_##ev.ev_count++ 93 #define VMEM_EVCNT_DECR(ev) vmem_evcnt_##ev.ev_count-- 94 95 VMEM_EVCNT_DEFINE(static_bt_count) 96 VMEM_EVCNT_DEFINE(static_bt_inuse) 97 98 #define VMEM_CONDVAR_INIT(vm, wchan) cv_init(&vm->vm_cv, wchan) 99 #define VMEM_CONDVAR_DESTROY(vm) cv_destroy(&vm->vm_cv) 100 #define VMEM_CONDVAR_WAIT(vm) cv_wait(&vm->vm_cv, &vm->vm_lock) 101 #define VMEM_CONDVAR_BROADCAST(vm) cv_broadcast(&vm->vm_cv) 102 103 #else /* defined(_KERNEL) */ 104 105 #define VMEM_EVCNT_INCR(ev) /* nothing */ 106 #define VMEM_EVCNT_DECR(ev) /* nothing */ 107 108 #define VMEM_CONDVAR_INIT(vm, wchan) /* nothing */ 109 #define VMEM_CONDVAR_DESTROY(vm) /* nothing */ 110 #define VMEM_CONDVAR_WAIT(vm) /* nothing */ 111 #define VMEM_CONDVAR_BROADCAST(vm) /* nothing */ 112 113 #define UNITTEST 114 #define KASSERT(a) assert(a) 115 #define mutex_init(a, b, c) /* nothing */ 116 #define mutex_destroy(a) /* nothing */ 117 #define mutex_enter(a) /* nothing */ 118 #define mutex_tryenter(a) true 119 #define mutex_exit(a) /* nothing */ 120 #define mutex_owned(a) /* nothing */ 121 #define ASSERT_SLEEPABLE() /* nothing */ 122 #define panic(...) printf(__VA_ARGS__); abort() 123 #endif /* defined(_KERNEL) */ 124 125 #if defined(VMEM_SANITY) 126 static void vmem_check(vmem_t *); 127 #else /* defined(VMEM_SANITY) */ 128 #define vmem_check(vm) /* nothing */ 129 #endif /* defined(VMEM_SANITY) */ 130 131 #define VMEM_HASHSIZE_MIN 1 /* XXX */ 132 #define VMEM_HASHSIZE_MAX 65536 /* XXX */ 133 #define VMEM_HASHSIZE_INIT 1 134 135 #define VM_FITMASK (VM_BESTFIT | VM_INSTANTFIT) 136 137 #if defined(_KERNEL) 138 static bool vmem_bootstrapped = false; 139 static kmutex_t vmem_list_lock; 140 static LIST_HEAD(, vmem) vmem_list = LIST_HEAD_INITIALIZER(vmem_list); 141 #endif /* defined(_KERNEL) */ 142 143 /* ---- misc */ 144 145 #define VMEM_LOCK(vm) mutex_enter(&vm->vm_lock) 146 #define VMEM_TRYLOCK(vm) mutex_tryenter(&vm->vm_lock) 147 #define VMEM_UNLOCK(vm) mutex_exit(&vm->vm_lock) 148 #define VMEM_LOCK_INIT(vm, ipl) mutex_init(&vm->vm_lock, MUTEX_DEFAULT, ipl) 149 #define VMEM_LOCK_DESTROY(vm) mutex_destroy(&vm->vm_lock) 150 #define VMEM_ASSERT_LOCKED(vm) KASSERT(mutex_owned(&vm->vm_lock)) 151 152 #define VMEM_ALIGNUP(addr, align) \ 153 (-(-(addr) & -(align))) 154 155 #define VMEM_CROSS_P(addr1, addr2, boundary) \ 156 ((((addr1) ^ (addr2)) & -(boundary)) != 0) 157 158 #define ORDER2SIZE(order) ((vmem_size_t)1 << (order)) 159 #define SIZE2ORDER(size) ((int)ilog2(size)) 160 161 #if !defined(_KERNEL) 162 #define xmalloc(sz, flags) malloc(sz) 163 #define xfree(p, sz) free(p) 164 #define bt_alloc(vm, flags) malloc(sizeof(bt_t)) 165 #define bt_free(vm, bt) free(bt) 166 #else /* defined(_KERNEL) */ 167 168 #define xmalloc(sz, flags) \ 169 kmem_alloc(sz, ((flags) & VM_SLEEP) ? KM_SLEEP : KM_NOSLEEP); 170 #define xfree(p, sz) kmem_free(p, sz); 171 172 /* 173 * BT_RESERVE calculation: 174 * we allocate memory for boundry tags with vmem; therefore we have 175 * to keep a reserve of bts used to allocated memory for bts. 176 * This reserve is 4 for each arena involved in allocating vmems memory. 177 * BT_MAXFREE: don't cache excessive counts of bts in arenas 178 */ 179 #define STATIC_BT_COUNT 200 180 #define BT_MINRESERVE 4 181 #define BT_MAXFREE 64 182 183 static struct vmem_btag static_bts[STATIC_BT_COUNT]; 184 static int static_bt_count = STATIC_BT_COUNT; 185 186 static struct vmem kmem_va_meta_arena_store; 187 vmem_t *kmem_va_meta_arena; 188 static struct vmem kmem_meta_arena_store; 189 vmem_t *kmem_meta_arena = NULL; 190 191 static kmutex_t vmem_btag_refill_lock; 192 static kmutex_t vmem_btag_lock; 193 static LIST_HEAD(, vmem_btag) vmem_btag_freelist; 194 static size_t vmem_btag_freelist_count = 0; 195 static struct pool vmem_btag_pool; 196 197 static void 198 vmem_kick_pdaemon(void) 199 { 200 #if defined(_KERNEL) 201 uvm_kick_pdaemon(); 202 #endif 203 } 204 205 /* ---- boundary tag */ 206 207 static int bt_refill(vmem_t *vm); 208 209 static void * 210 pool_page_alloc_vmem_meta(struct pool *pp, int flags) 211 { 212 const vm_flag_t vflags = (flags & PR_WAITOK) ? VM_SLEEP: VM_NOSLEEP; 213 vmem_addr_t va; 214 int ret; 215 216 ret = vmem_alloc(kmem_meta_arena, pp->pr_alloc->pa_pagesz, 217 (vflags & ~VM_FITMASK) | VM_INSTANTFIT | VM_POPULATING, &va); 218 219 return ret ? NULL : (void *)va; 220 } 221 222 static void 223 pool_page_free_vmem_meta(struct pool *pp, void *v) 224 { 225 226 vmem_free(kmem_meta_arena, (vmem_addr_t)v, pp->pr_alloc->pa_pagesz); 227 } 228 229 /* allocator for vmem-pool metadata */ 230 struct pool_allocator pool_allocator_vmem_meta = { 231 .pa_alloc = pool_page_alloc_vmem_meta, 232 .pa_free = pool_page_free_vmem_meta, 233 .pa_pagesz = 0 234 }; 235 236 static int 237 bt_refill(vmem_t *vm) 238 { 239 bt_t *bt; 240 241 VMEM_LOCK(vm); 242 if (vm->vm_nfreetags > BT_MINRESERVE) { 243 VMEM_UNLOCK(vm); 244 return 0; 245 } 246 247 mutex_enter(&vmem_btag_lock); 248 while (!LIST_EMPTY(&vmem_btag_freelist) && 249 vm->vm_nfreetags <= BT_MINRESERVE) { 250 bt = LIST_FIRST(&vmem_btag_freelist); 251 LIST_REMOVE(bt, bt_freelist); 252 LIST_INSERT_HEAD(&vm->vm_freetags, bt, bt_freelist); 253 vm->vm_nfreetags++; 254 vmem_btag_freelist_count--; 255 VMEM_EVCNT_INCR(static_bt_inuse); 256 } 257 mutex_exit(&vmem_btag_lock); 258 259 while (vm->vm_nfreetags <= BT_MINRESERVE) { 260 VMEM_UNLOCK(vm); 261 mutex_enter(&vmem_btag_refill_lock); 262 bt = pool_get(&vmem_btag_pool, PR_NOWAIT); 263 mutex_exit(&vmem_btag_refill_lock); 264 VMEM_LOCK(vm); 265 if (bt == NULL) 266 break; 267 LIST_INSERT_HEAD(&vm->vm_freetags, bt, bt_freelist); 268 vm->vm_nfreetags++; 269 } 270 271 if (vm->vm_nfreetags <= BT_MINRESERVE) { 272 VMEM_UNLOCK(vm); 273 return ENOMEM; 274 } 275 276 VMEM_UNLOCK(vm); 277 278 if (kmem_meta_arena != NULL) { 279 (void)bt_refill(kmem_arena); 280 (void)bt_refill(kmem_va_meta_arena); 281 (void)bt_refill(kmem_meta_arena); 282 } 283 284 return 0; 285 } 286 287 static bt_t * 288 bt_alloc(vmem_t *vm, vm_flag_t flags) 289 { 290 bt_t *bt; 291 VMEM_LOCK(vm); 292 while (vm->vm_nfreetags <= BT_MINRESERVE && (flags & VM_POPULATING) == 0) { 293 VMEM_UNLOCK(vm); 294 if (bt_refill(vm)) { 295 if ((flags & VM_NOSLEEP) != 0) { 296 return NULL; 297 } 298 299 /* 300 * It would be nice to wait for something specific here 301 * but there are multiple ways that a retry could 302 * succeed and we can't wait for multiple things 303 * simultaneously. So we'll just sleep for an arbitrary 304 * short period of time and retry regardless. 305 * This should be a very rare case. 306 */ 307 308 vmem_kick_pdaemon(); 309 kpause("btalloc", false, 1, NULL); 310 } 311 VMEM_LOCK(vm); 312 } 313 bt = LIST_FIRST(&vm->vm_freetags); 314 LIST_REMOVE(bt, bt_freelist); 315 vm->vm_nfreetags--; 316 VMEM_UNLOCK(vm); 317 318 return bt; 319 } 320 321 static void 322 bt_free(vmem_t *vm, bt_t *bt) 323 { 324 325 VMEM_LOCK(vm); 326 LIST_INSERT_HEAD(&vm->vm_freetags, bt, bt_freelist); 327 vm->vm_nfreetags++; 328 VMEM_UNLOCK(vm); 329 } 330 331 static void 332 bt_freetrim(vmem_t *vm, int freelimit) 333 { 334 bt_t *t; 335 LIST_HEAD(, vmem_btag) tofree; 336 337 LIST_INIT(&tofree); 338 339 VMEM_LOCK(vm); 340 while (vm->vm_nfreetags > freelimit) { 341 bt_t *bt = LIST_FIRST(&vm->vm_freetags); 342 LIST_REMOVE(bt, bt_freelist); 343 vm->vm_nfreetags--; 344 if (bt >= static_bts 345 && bt < &static_bts[STATIC_BT_COUNT]) { 346 mutex_enter(&vmem_btag_lock); 347 LIST_INSERT_HEAD(&vmem_btag_freelist, bt, bt_freelist); 348 vmem_btag_freelist_count++; 349 mutex_exit(&vmem_btag_lock); 350 VMEM_EVCNT_DECR(static_bt_inuse); 351 } else { 352 LIST_INSERT_HEAD(&tofree, bt, bt_freelist); 353 } 354 } 355 356 VMEM_UNLOCK(vm); 357 while (!LIST_EMPTY(&tofree)) { 358 t = LIST_FIRST(&tofree); 359 LIST_REMOVE(t, bt_freelist); 360 pool_put(&vmem_btag_pool, t); 361 } 362 } 363 #endif /* defined(_KERNEL) */ 364 365 /* 366 * freelist[0] ... [1, 1] 367 * freelist[1] ... [2, 3] 368 * freelist[2] ... [4, 7] 369 * freelist[3] ... [8, 15] 370 * : 371 * freelist[n] ... [(1 << n), (1 << (n + 1)) - 1] 372 * : 373 */ 374 375 static struct vmem_freelist * 376 bt_freehead_tofree(vmem_t *vm, vmem_size_t size) 377 { 378 const vmem_size_t qsize = size >> vm->vm_quantum_shift; 379 const int idx = SIZE2ORDER(qsize); 380 381 KASSERT(size != 0 && qsize != 0); 382 KASSERT((size & vm->vm_quantum_mask) == 0); 383 KASSERT(idx >= 0); 384 KASSERT(idx < VMEM_MAXORDER); 385 386 return &vm->vm_freelist[idx]; 387 } 388 389 /* 390 * bt_freehead_toalloc: return the freelist for the given size and allocation 391 * strategy. 392 * 393 * for VM_INSTANTFIT, return the list in which any blocks are large enough 394 * for the requested size. otherwise, return the list which can have blocks 395 * large enough for the requested size. 396 */ 397 398 static struct vmem_freelist * 399 bt_freehead_toalloc(vmem_t *vm, vmem_size_t size, vm_flag_t strat) 400 { 401 const vmem_size_t qsize = size >> vm->vm_quantum_shift; 402 int idx = SIZE2ORDER(qsize); 403 404 KASSERT(size != 0 && qsize != 0); 405 KASSERT((size & vm->vm_quantum_mask) == 0); 406 407 if (strat == VM_INSTANTFIT && ORDER2SIZE(idx) != qsize) { 408 idx++; 409 /* check too large request? */ 410 } 411 KASSERT(idx >= 0); 412 KASSERT(idx < VMEM_MAXORDER); 413 414 return &vm->vm_freelist[idx]; 415 } 416 417 /* ---- boundary tag hash */ 418 419 static struct vmem_hashlist * 420 bt_hashhead(vmem_t *vm, vmem_addr_t addr) 421 { 422 struct vmem_hashlist *list; 423 unsigned int hash; 424 425 hash = hash32_buf(&addr, sizeof(addr), HASH32_BUF_INIT); 426 list = &vm->vm_hashlist[hash % vm->vm_hashsize]; 427 428 return list; 429 } 430 431 static bt_t * 432 bt_lookupbusy(vmem_t *vm, vmem_addr_t addr) 433 { 434 struct vmem_hashlist *list; 435 bt_t *bt; 436 437 list = bt_hashhead(vm, addr); 438 LIST_FOREACH(bt, list, bt_hashlist) { 439 if (bt->bt_start == addr) { 440 break; 441 } 442 } 443 444 return bt; 445 } 446 447 static void 448 bt_rembusy(vmem_t *vm, bt_t *bt) 449 { 450 451 KASSERT(vm->vm_nbusytag > 0); 452 vm->vm_inuse -= bt->bt_size; 453 vm->vm_nbusytag--; 454 LIST_REMOVE(bt, bt_hashlist); 455 } 456 457 static void 458 bt_insbusy(vmem_t *vm, bt_t *bt) 459 { 460 struct vmem_hashlist *list; 461 462 KASSERT(bt->bt_type == BT_TYPE_BUSY); 463 464 list = bt_hashhead(vm, bt->bt_start); 465 LIST_INSERT_HEAD(list, bt, bt_hashlist); 466 vm->vm_nbusytag++; 467 vm->vm_inuse += bt->bt_size; 468 } 469 470 /* ---- boundary tag list */ 471 472 static void 473 bt_remseg(vmem_t *vm, bt_t *bt) 474 { 475 476 TAILQ_REMOVE(&vm->vm_seglist, bt, bt_seglist); 477 } 478 479 static void 480 bt_insseg(vmem_t *vm, bt_t *bt, bt_t *prev) 481 { 482 483 TAILQ_INSERT_AFTER(&vm->vm_seglist, prev, bt, bt_seglist); 484 } 485 486 static void 487 bt_insseg_tail(vmem_t *vm, bt_t *bt) 488 { 489 490 TAILQ_INSERT_TAIL(&vm->vm_seglist, bt, bt_seglist); 491 } 492 493 static void 494 bt_remfree(vmem_t *vm, bt_t *bt) 495 { 496 497 KASSERT(bt->bt_type == BT_TYPE_FREE); 498 499 LIST_REMOVE(bt, bt_freelist); 500 } 501 502 static void 503 bt_insfree(vmem_t *vm, bt_t *bt) 504 { 505 struct vmem_freelist *list; 506 507 list = bt_freehead_tofree(vm, bt->bt_size); 508 LIST_INSERT_HEAD(list, bt, bt_freelist); 509 } 510 511 /* ---- vmem internal functions */ 512 513 #if defined(QCACHE) 514 static inline vm_flag_t 515 prf_to_vmf(int prflags) 516 { 517 vm_flag_t vmflags; 518 519 KASSERT((prflags & ~(PR_LIMITFAIL | PR_WAITOK | PR_NOWAIT)) == 0); 520 if ((prflags & PR_WAITOK) != 0) { 521 vmflags = VM_SLEEP; 522 } else { 523 vmflags = VM_NOSLEEP; 524 } 525 return vmflags; 526 } 527 528 static inline int 529 vmf_to_prf(vm_flag_t vmflags) 530 { 531 int prflags; 532 533 if ((vmflags & VM_SLEEP) != 0) { 534 prflags = PR_WAITOK; 535 } else { 536 prflags = PR_NOWAIT; 537 } 538 return prflags; 539 } 540 541 static size_t 542 qc_poolpage_size(size_t qcache_max) 543 { 544 int i; 545 546 for (i = 0; ORDER2SIZE(i) <= qcache_max * 3; i++) { 547 /* nothing */ 548 } 549 return ORDER2SIZE(i); 550 } 551 552 static void * 553 qc_poolpage_alloc(struct pool *pool, int prflags) 554 { 555 qcache_t *qc = QC_POOL_TO_QCACHE(pool); 556 vmem_t *vm = qc->qc_vmem; 557 vmem_addr_t addr; 558 559 if (vmem_alloc(vm, pool->pr_alloc->pa_pagesz, 560 prf_to_vmf(prflags) | VM_INSTANTFIT, &addr) != 0) 561 return NULL; 562 return (void *)addr; 563 } 564 565 static void 566 qc_poolpage_free(struct pool *pool, void *addr) 567 { 568 qcache_t *qc = QC_POOL_TO_QCACHE(pool); 569 vmem_t *vm = qc->qc_vmem; 570 571 vmem_free(vm, (vmem_addr_t)addr, pool->pr_alloc->pa_pagesz); 572 } 573 574 static void 575 qc_init(vmem_t *vm, size_t qcache_max, int ipl) 576 { 577 qcache_t *prevqc; 578 struct pool_allocator *pa; 579 int qcache_idx_max; 580 int i; 581 582 KASSERT((qcache_max & vm->vm_quantum_mask) == 0); 583 if (qcache_max > (VMEM_QCACHE_IDX_MAX << vm->vm_quantum_shift)) { 584 qcache_max = VMEM_QCACHE_IDX_MAX << vm->vm_quantum_shift; 585 } 586 vm->vm_qcache_max = qcache_max; 587 pa = &vm->vm_qcache_allocator; 588 memset(pa, 0, sizeof(*pa)); 589 pa->pa_alloc = qc_poolpage_alloc; 590 pa->pa_free = qc_poolpage_free; 591 pa->pa_pagesz = qc_poolpage_size(qcache_max); 592 593 qcache_idx_max = qcache_max >> vm->vm_quantum_shift; 594 prevqc = NULL; 595 for (i = qcache_idx_max; i > 0; i--) { 596 qcache_t *qc = &vm->vm_qcache_store[i - 1]; 597 size_t size = i << vm->vm_quantum_shift; 598 pool_cache_t pc; 599 600 qc->qc_vmem = vm; 601 snprintf(qc->qc_name, sizeof(qc->qc_name), "%s-%zu", 602 vm->vm_name, size); 603 604 pc = pool_cache_init(size, 605 ORDER2SIZE(vm->vm_quantum_shift), 0, 606 PR_NOALIGN | PR_NOTOUCH | PR_RECURSIVE /* XXX */, 607 qc->qc_name, pa, ipl, NULL, NULL, NULL); 608 609 KASSERT(pc); 610 611 qc->qc_cache = pc; 612 KASSERT(qc->qc_cache != NULL); /* XXX */ 613 if (prevqc != NULL && 614 qc->qc_cache->pc_pool.pr_itemsperpage == 615 prevqc->qc_cache->pc_pool.pr_itemsperpage) { 616 pool_cache_destroy(qc->qc_cache); 617 vm->vm_qcache[i - 1] = prevqc; 618 continue; 619 } 620 qc->qc_cache->pc_pool.pr_qcache = qc; 621 vm->vm_qcache[i - 1] = qc; 622 prevqc = qc; 623 } 624 } 625 626 static void 627 qc_destroy(vmem_t *vm) 628 { 629 const qcache_t *prevqc; 630 int i; 631 int qcache_idx_max; 632 633 qcache_idx_max = vm->vm_qcache_max >> vm->vm_quantum_shift; 634 prevqc = NULL; 635 for (i = 0; i < qcache_idx_max; i++) { 636 qcache_t *qc = vm->vm_qcache[i]; 637 638 if (prevqc == qc) { 639 continue; 640 } 641 pool_cache_destroy(qc->qc_cache); 642 prevqc = qc; 643 } 644 } 645 #endif 646 647 #if defined(_KERNEL) 648 static void 649 vmem_bootstrap(void) 650 { 651 652 mutex_init(&vmem_list_lock, MUTEX_DEFAULT, IPL_VM); 653 mutex_init(&vmem_btag_lock, MUTEX_DEFAULT, IPL_VM); 654 mutex_init(&vmem_btag_refill_lock, MUTEX_DEFAULT, IPL_VM); 655 656 while (static_bt_count-- > 0) { 657 bt_t *bt = &static_bts[static_bt_count]; 658 LIST_INSERT_HEAD(&vmem_btag_freelist, bt, bt_freelist); 659 VMEM_EVCNT_INCR(static_bt_count); 660 vmem_btag_freelist_count++; 661 } 662 vmem_bootstrapped = TRUE; 663 } 664 665 void 666 vmem_subsystem_init(vmem_t *vm) 667 { 668 669 kmem_va_meta_arena = vmem_init(&kmem_va_meta_arena_store, "vmem-va", 670 0, 0, PAGE_SIZE, vmem_alloc, vmem_free, vm, 671 0, VM_NOSLEEP | VM_BOOTSTRAP | VM_LARGEIMPORT, 672 IPL_VM); 673 674 kmem_meta_arena = vmem_init(&kmem_meta_arena_store, "vmem-meta", 675 0, 0, PAGE_SIZE, 676 uvm_km_kmem_alloc, uvm_km_kmem_free, kmem_va_meta_arena, 677 0, VM_NOSLEEP | VM_BOOTSTRAP, IPL_VM); 678 679 pool_init(&vmem_btag_pool, sizeof(bt_t), 0, 0, PR_PHINPAGE, 680 "vmembt", &pool_allocator_vmem_meta, IPL_VM); 681 } 682 #endif /* defined(_KERNEL) */ 683 684 static int 685 vmem_add1(vmem_t *vm, vmem_addr_t addr, vmem_size_t size, vm_flag_t flags, 686 int spanbttype) 687 { 688 bt_t *btspan; 689 bt_t *btfree; 690 691 KASSERT((flags & (VM_SLEEP|VM_NOSLEEP)) != 0); 692 KASSERT((~flags & (VM_SLEEP|VM_NOSLEEP)) != 0); 693 KASSERT(spanbttype == BT_TYPE_SPAN || 694 spanbttype == BT_TYPE_SPAN_STATIC); 695 696 btspan = bt_alloc(vm, flags); 697 if (btspan == NULL) { 698 return ENOMEM; 699 } 700 btfree = bt_alloc(vm, flags); 701 if (btfree == NULL) { 702 bt_free(vm, btspan); 703 return ENOMEM; 704 } 705 706 btspan->bt_type = spanbttype; 707 btspan->bt_start = addr; 708 btspan->bt_size = size; 709 710 btfree->bt_type = BT_TYPE_FREE; 711 btfree->bt_start = addr; 712 btfree->bt_size = size; 713 714 VMEM_LOCK(vm); 715 bt_insseg_tail(vm, btspan); 716 bt_insseg(vm, btfree, btspan); 717 bt_insfree(vm, btfree); 718 vm->vm_size += size; 719 VMEM_UNLOCK(vm); 720 721 return 0; 722 } 723 724 static void 725 vmem_destroy1(vmem_t *vm) 726 { 727 728 #if defined(QCACHE) 729 qc_destroy(vm); 730 #endif /* defined(QCACHE) */ 731 if (vm->vm_hashlist != NULL) { 732 int i; 733 734 for (i = 0; i < vm->vm_hashsize; i++) { 735 bt_t *bt; 736 737 while ((bt = LIST_FIRST(&vm->vm_hashlist[i])) != NULL) { 738 KASSERT(bt->bt_type == BT_TYPE_SPAN_STATIC); 739 bt_free(vm, bt); 740 } 741 } 742 if (vm->vm_hashlist != &vm->vm_hash0) { 743 xfree(vm->vm_hashlist, 744 sizeof(struct vmem_hashlist *) * vm->vm_hashsize); 745 } 746 } 747 748 bt_freetrim(vm, 0); 749 750 VMEM_CONDVAR_DESTROY(vm); 751 VMEM_LOCK_DESTROY(vm); 752 xfree(vm, sizeof(*vm)); 753 } 754 755 static int 756 vmem_import(vmem_t *vm, vmem_size_t size, vm_flag_t flags) 757 { 758 vmem_addr_t addr; 759 int rc; 760 761 if (vm->vm_importfn == NULL) { 762 return EINVAL; 763 } 764 765 if (vm->vm_flags & VM_LARGEIMPORT) { 766 size *= 16; 767 } 768 769 if (vm->vm_flags & VM_XIMPORT) { 770 rc = __FPTRCAST(vmem_ximport_t *, vm->vm_importfn)(vm->vm_arg, 771 size, &size, flags, &addr); 772 } else { 773 rc = (vm->vm_importfn)(vm->vm_arg, size, flags, &addr); 774 } 775 if (rc) { 776 return ENOMEM; 777 } 778 779 if (vmem_add1(vm, addr, size, flags, BT_TYPE_SPAN) != 0) { 780 (*vm->vm_releasefn)(vm->vm_arg, addr, size); 781 return ENOMEM; 782 } 783 784 return 0; 785 } 786 787 static int 788 vmem_rehash(vmem_t *vm, size_t newhashsize, vm_flag_t flags) 789 { 790 bt_t *bt; 791 int i; 792 struct vmem_hashlist *newhashlist; 793 struct vmem_hashlist *oldhashlist; 794 size_t oldhashsize; 795 796 KASSERT(newhashsize > 0); 797 798 newhashlist = 799 xmalloc(sizeof(struct vmem_hashlist *) * newhashsize, flags); 800 if (newhashlist == NULL) { 801 return ENOMEM; 802 } 803 for (i = 0; i < newhashsize; i++) { 804 LIST_INIT(&newhashlist[i]); 805 } 806 807 if (!VMEM_TRYLOCK(vm)) { 808 xfree(newhashlist, 809 sizeof(struct vmem_hashlist *) * newhashsize); 810 return EBUSY; 811 } 812 oldhashlist = vm->vm_hashlist; 813 oldhashsize = vm->vm_hashsize; 814 vm->vm_hashlist = newhashlist; 815 vm->vm_hashsize = newhashsize; 816 if (oldhashlist == NULL) { 817 VMEM_UNLOCK(vm); 818 return 0; 819 } 820 for (i = 0; i < oldhashsize; i++) { 821 while ((bt = LIST_FIRST(&oldhashlist[i])) != NULL) { 822 bt_rembusy(vm, bt); /* XXX */ 823 bt_insbusy(vm, bt); 824 } 825 } 826 VMEM_UNLOCK(vm); 827 828 if (oldhashlist != &vm->vm_hash0) { 829 xfree(oldhashlist, 830 sizeof(struct vmem_hashlist *) * oldhashsize); 831 } 832 833 return 0; 834 } 835 836 /* 837 * vmem_fit: check if a bt can satisfy the given restrictions. 838 * 839 * it's a caller's responsibility to ensure the region is big enough 840 * before calling us. 841 */ 842 843 static int 844 vmem_fit(const bt_t *bt, vmem_size_t size, vmem_size_t align, 845 vmem_size_t phase, vmem_size_t nocross, 846 vmem_addr_t minaddr, vmem_addr_t maxaddr, vmem_addr_t *addrp) 847 { 848 vmem_addr_t start; 849 vmem_addr_t end; 850 851 KASSERT(size > 0); 852 KASSERT(bt->bt_size >= size); /* caller's responsibility */ 853 854 /* 855 * XXX assumption: vmem_addr_t and vmem_size_t are 856 * unsigned integer of the same size. 857 */ 858 859 start = bt->bt_start; 860 if (start < minaddr) { 861 start = minaddr; 862 } 863 end = BT_END(bt); 864 if (end > maxaddr) { 865 end = maxaddr; 866 } 867 if (start > end) { 868 return ENOMEM; 869 } 870 871 start = VMEM_ALIGNUP(start - phase, align) + phase; 872 if (start < bt->bt_start) { 873 start += align; 874 } 875 if (VMEM_CROSS_P(start, start + size - 1, nocross)) { 876 KASSERT(align < nocross); 877 start = VMEM_ALIGNUP(start - phase, nocross) + phase; 878 } 879 if (start <= end && end - start >= size - 1) { 880 KASSERT((start & (align - 1)) == phase); 881 KASSERT(!VMEM_CROSS_P(start, start + size - 1, nocross)); 882 KASSERT(minaddr <= start); 883 KASSERT(maxaddr == 0 || start + size - 1 <= maxaddr); 884 KASSERT(bt->bt_start <= start); 885 KASSERT(BT_END(bt) - start >= size - 1); 886 *addrp = start; 887 return 0; 888 } 889 return ENOMEM; 890 } 891 892 /* ---- vmem API */ 893 894 /* 895 * vmem_create_internal: creates a vmem arena. 896 */ 897 898 vmem_t * 899 vmem_init(vmem_t *vm, const char *name, 900 vmem_addr_t base, vmem_size_t size, vmem_size_t quantum, 901 vmem_import_t *importfn, vmem_release_t *releasefn, 902 vmem_t *arg, vmem_size_t qcache_max, vm_flag_t flags, int ipl) 903 { 904 int i; 905 906 KASSERT((flags & (VM_SLEEP|VM_NOSLEEP)) != 0); 907 KASSERT((~flags & (VM_SLEEP|VM_NOSLEEP)) != 0); 908 KASSERT(quantum > 0); 909 910 #if defined(_KERNEL) 911 /* XXX: SMP, we get called early... */ 912 if (!vmem_bootstrapped) { 913 vmem_bootstrap(); 914 } 915 #endif /* defined(_KERNEL) */ 916 917 if (vm == NULL) { 918 vm = xmalloc(sizeof(*vm), flags); 919 } 920 if (vm == NULL) { 921 return NULL; 922 } 923 924 VMEM_CONDVAR_INIT(vm, "vmem"); 925 VMEM_LOCK_INIT(vm, ipl); 926 vm->vm_flags = flags; 927 vm->vm_nfreetags = 0; 928 LIST_INIT(&vm->vm_freetags); 929 strlcpy(vm->vm_name, name, sizeof(vm->vm_name)); 930 vm->vm_quantum_mask = quantum - 1; 931 vm->vm_quantum_shift = SIZE2ORDER(quantum); 932 KASSERT(ORDER2SIZE(vm->vm_quantum_shift) == quantum); 933 vm->vm_importfn = importfn; 934 vm->vm_releasefn = releasefn; 935 vm->vm_arg = arg; 936 vm->vm_nbusytag = 0; 937 vm->vm_size = 0; 938 vm->vm_inuse = 0; 939 #if defined(QCACHE) 940 qc_init(vm, qcache_max, ipl); 941 #endif /* defined(QCACHE) */ 942 943 TAILQ_INIT(&vm->vm_seglist); 944 for (i = 0; i < VMEM_MAXORDER; i++) { 945 LIST_INIT(&vm->vm_freelist[i]); 946 } 947 memset(&vm->vm_hash0, 0, sizeof(struct vmem_hashlist)); 948 vm->vm_hashsize = 1; 949 vm->vm_hashlist = &vm->vm_hash0; 950 951 if (size != 0) { 952 if (vmem_add(vm, base, size, flags) != 0) { 953 vmem_destroy1(vm); 954 return NULL; 955 } 956 } 957 958 #if defined(_KERNEL) 959 if (flags & VM_BOOTSTRAP) { 960 bt_refill(vm); 961 } 962 963 mutex_enter(&vmem_list_lock); 964 LIST_INSERT_HEAD(&vmem_list, vm, vm_alllist); 965 mutex_exit(&vmem_list_lock); 966 #endif /* defined(_KERNEL) */ 967 968 return vm; 969 } 970 971 972 973 /* 974 * vmem_create: create an arena. 975 * 976 * => must not be called from interrupt context. 977 */ 978 979 vmem_t * 980 vmem_create(const char *name, vmem_addr_t base, vmem_size_t size, 981 vmem_size_t quantum, vmem_import_t *importfn, vmem_release_t *releasefn, 982 vmem_t *source, vmem_size_t qcache_max, vm_flag_t flags, int ipl) 983 { 984 985 KASSERT((flags & (VM_XIMPORT)) == 0); 986 987 return vmem_init(NULL, name, base, size, quantum, 988 importfn, releasefn, source, qcache_max, flags, ipl); 989 } 990 991 /* 992 * vmem_xcreate: create an arena takes alternative import func. 993 * 994 * => must not be called from interrupt context. 995 */ 996 997 vmem_t * 998 vmem_xcreate(const char *name, vmem_addr_t base, vmem_size_t size, 999 vmem_size_t quantum, vmem_ximport_t *importfn, vmem_release_t *releasefn, 1000 vmem_t *source, vmem_size_t qcache_max, vm_flag_t flags, int ipl) 1001 { 1002 1003 KASSERT((flags & (VM_XIMPORT)) == 0); 1004 1005 return vmem_init(NULL, name, base, size, quantum, 1006 __FPTRCAST(vmem_import_t *, importfn), releasefn, source, 1007 qcache_max, flags | VM_XIMPORT, ipl); 1008 } 1009 1010 void 1011 vmem_destroy(vmem_t *vm) 1012 { 1013 1014 #if defined(_KERNEL) 1015 mutex_enter(&vmem_list_lock); 1016 LIST_REMOVE(vm, vm_alllist); 1017 mutex_exit(&vmem_list_lock); 1018 #endif /* defined(_KERNEL) */ 1019 1020 vmem_destroy1(vm); 1021 } 1022 1023 vmem_size_t 1024 vmem_roundup_size(vmem_t *vm, vmem_size_t size) 1025 { 1026 1027 return (size + vm->vm_quantum_mask) & ~vm->vm_quantum_mask; 1028 } 1029 1030 /* 1031 * vmem_alloc: allocate resource from the arena. 1032 */ 1033 1034 int 1035 vmem_alloc(vmem_t *vm, vmem_size_t size, vm_flag_t flags, vmem_addr_t *addrp) 1036 { 1037 const vm_flag_t strat __diagused = flags & VM_FITMASK; 1038 int error; 1039 1040 KASSERT((flags & (VM_SLEEP|VM_NOSLEEP)) != 0); 1041 KASSERT((~flags & (VM_SLEEP|VM_NOSLEEP)) != 0); 1042 1043 KASSERT(size > 0); 1044 KASSERT(strat == VM_BESTFIT || strat == VM_INSTANTFIT); 1045 if ((flags & VM_SLEEP) != 0) { 1046 ASSERT_SLEEPABLE(); 1047 } 1048 1049 #if defined(QCACHE) 1050 if (size <= vm->vm_qcache_max) { 1051 void *p; 1052 int qidx = (size + vm->vm_quantum_mask) >> vm->vm_quantum_shift; 1053 qcache_t *qc = vm->vm_qcache[qidx - 1]; 1054 1055 p = pool_cache_get(qc->qc_cache, vmf_to_prf(flags)); 1056 if (addrp != NULL) 1057 *addrp = (vmem_addr_t)p; 1058 error = (p == NULL) ? ENOMEM : 0; 1059 goto out; 1060 } 1061 #endif /* defined(QCACHE) */ 1062 1063 error = vmem_xalloc(vm, size, 0, 0, 0, VMEM_ADDR_MIN, VMEM_ADDR_MAX, 1064 flags, addrp); 1065 out: 1066 KASSERT(error == 0 || (flags & VM_SLEEP) == 0); 1067 return error; 1068 } 1069 1070 int 1071 vmem_xalloc(vmem_t *vm, const vmem_size_t size0, vmem_size_t align, 1072 const vmem_size_t phase, const vmem_size_t nocross, 1073 const vmem_addr_t minaddr, const vmem_addr_t maxaddr, const vm_flag_t flags, 1074 vmem_addr_t *addrp) 1075 { 1076 struct vmem_freelist *list; 1077 struct vmem_freelist *first; 1078 struct vmem_freelist *end; 1079 bt_t *bt; 1080 bt_t *btnew; 1081 bt_t *btnew2; 1082 const vmem_size_t size = vmem_roundup_size(vm, size0); 1083 vm_flag_t strat = flags & VM_FITMASK; 1084 vmem_addr_t start; 1085 int rc; 1086 1087 KASSERT(size0 > 0); 1088 KASSERT(size > 0); 1089 KASSERT(strat == VM_BESTFIT || strat == VM_INSTANTFIT); 1090 if ((flags & VM_SLEEP) != 0) { 1091 ASSERT_SLEEPABLE(); 1092 } 1093 KASSERT((align & vm->vm_quantum_mask) == 0); 1094 KASSERT((align & (align - 1)) == 0); 1095 KASSERT((phase & vm->vm_quantum_mask) == 0); 1096 KASSERT((nocross & vm->vm_quantum_mask) == 0); 1097 KASSERT((nocross & (nocross - 1)) == 0); 1098 KASSERT((align == 0 && phase == 0) || phase < align); 1099 KASSERT(nocross == 0 || nocross >= size); 1100 KASSERT(minaddr <= maxaddr); 1101 KASSERT(!VMEM_CROSS_P(phase, phase + size - 1, nocross)); 1102 1103 if (align == 0) { 1104 align = vm->vm_quantum_mask + 1; 1105 } 1106 1107 /* 1108 * allocate boundary tags before acquiring the vmem lock. 1109 */ 1110 btnew = bt_alloc(vm, flags); 1111 if (btnew == NULL) { 1112 return ENOMEM; 1113 } 1114 btnew2 = bt_alloc(vm, flags); /* XXX not necessary if no restrictions */ 1115 if (btnew2 == NULL) { 1116 bt_free(vm, btnew); 1117 return ENOMEM; 1118 } 1119 1120 /* 1121 * choose a free block from which we allocate. 1122 */ 1123 retry_strat: 1124 first = bt_freehead_toalloc(vm, size, strat); 1125 end = &vm->vm_freelist[VMEM_MAXORDER]; 1126 retry: 1127 bt = NULL; 1128 VMEM_LOCK(vm); 1129 vmem_check(vm); 1130 if (strat == VM_INSTANTFIT) { 1131 /* 1132 * just choose the first block which satisfies our restrictions. 1133 * 1134 * note that we don't need to check the size of the blocks 1135 * because any blocks found on these list should be larger than 1136 * the given size. 1137 */ 1138 for (list = first; list < end; list++) { 1139 bt = LIST_FIRST(list); 1140 if (bt != NULL) { 1141 rc = vmem_fit(bt, size, align, phase, 1142 nocross, minaddr, maxaddr, &start); 1143 if (rc == 0) { 1144 goto gotit; 1145 } 1146 /* 1147 * don't bother to follow the bt_freelist link 1148 * here. the list can be very long and we are 1149 * told to run fast. blocks from the later free 1150 * lists are larger and have better chances to 1151 * satisfy our restrictions. 1152 */ 1153 } 1154 } 1155 } else { /* VM_BESTFIT */ 1156 /* 1157 * we assume that, for space efficiency, it's better to 1158 * allocate from a smaller block. thus we will start searching 1159 * from the lower-order list than VM_INSTANTFIT. 1160 * however, don't bother to find the smallest block in a free 1161 * list because the list can be very long. we can revisit it 1162 * if/when it turns out to be a problem. 1163 * 1164 * note that the 'first' list can contain blocks smaller than 1165 * the requested size. thus we need to check bt_size. 1166 */ 1167 for (list = first; list < end; list++) { 1168 LIST_FOREACH(bt, list, bt_freelist) { 1169 if (bt->bt_size >= size) { 1170 rc = vmem_fit(bt, size, align, phase, 1171 nocross, minaddr, maxaddr, &start); 1172 if (rc == 0) { 1173 goto gotit; 1174 } 1175 } 1176 } 1177 } 1178 } 1179 VMEM_UNLOCK(vm); 1180 #if 1 1181 if (strat == VM_INSTANTFIT) { 1182 strat = VM_BESTFIT; 1183 goto retry_strat; 1184 } 1185 #endif 1186 if (align != vm->vm_quantum_mask + 1 || phase != 0 || nocross != 0) { 1187 1188 /* 1189 * XXX should try to import a region large enough to 1190 * satisfy restrictions? 1191 */ 1192 1193 goto fail; 1194 } 1195 /* XXX eeek, minaddr & maxaddr not respected */ 1196 if (vmem_import(vm, size, flags) == 0) { 1197 goto retry; 1198 } 1199 /* XXX */ 1200 1201 if ((flags & VM_SLEEP) != 0) { 1202 vmem_kick_pdaemon(); 1203 VMEM_LOCK(vm); 1204 VMEM_CONDVAR_WAIT(vm); 1205 VMEM_UNLOCK(vm); 1206 goto retry; 1207 } 1208 fail: 1209 bt_free(vm, btnew); 1210 bt_free(vm, btnew2); 1211 return ENOMEM; 1212 1213 gotit: 1214 KASSERT(bt->bt_type == BT_TYPE_FREE); 1215 KASSERT(bt->bt_size >= size); 1216 bt_remfree(vm, bt); 1217 vmem_check(vm); 1218 if (bt->bt_start != start) { 1219 btnew2->bt_type = BT_TYPE_FREE; 1220 btnew2->bt_start = bt->bt_start; 1221 btnew2->bt_size = start - bt->bt_start; 1222 bt->bt_start = start; 1223 bt->bt_size -= btnew2->bt_size; 1224 bt_insfree(vm, btnew2); 1225 bt_insseg(vm, btnew2, TAILQ_PREV(bt, vmem_seglist, bt_seglist)); 1226 btnew2 = NULL; 1227 vmem_check(vm); 1228 } 1229 KASSERT(bt->bt_start == start); 1230 if (bt->bt_size != size && bt->bt_size - size > vm->vm_quantum_mask) { 1231 /* split */ 1232 btnew->bt_type = BT_TYPE_BUSY; 1233 btnew->bt_start = bt->bt_start; 1234 btnew->bt_size = size; 1235 bt->bt_start = bt->bt_start + size; 1236 bt->bt_size -= size; 1237 bt_insfree(vm, bt); 1238 bt_insseg(vm, btnew, TAILQ_PREV(bt, vmem_seglist, bt_seglist)); 1239 bt_insbusy(vm, btnew); 1240 vmem_check(vm); 1241 VMEM_UNLOCK(vm); 1242 } else { 1243 bt->bt_type = BT_TYPE_BUSY; 1244 bt_insbusy(vm, bt); 1245 vmem_check(vm); 1246 VMEM_UNLOCK(vm); 1247 bt_free(vm, btnew); 1248 btnew = bt; 1249 } 1250 if (btnew2 != NULL) { 1251 bt_free(vm, btnew2); 1252 } 1253 KASSERT(btnew->bt_size >= size); 1254 btnew->bt_type = BT_TYPE_BUSY; 1255 1256 if (addrp != NULL) 1257 *addrp = btnew->bt_start; 1258 return 0; 1259 } 1260 1261 /* 1262 * vmem_free: free the resource to the arena. 1263 */ 1264 1265 void 1266 vmem_free(vmem_t *vm, vmem_addr_t addr, vmem_size_t size) 1267 { 1268 1269 KASSERT(size > 0); 1270 1271 #if defined(QCACHE) 1272 if (size <= vm->vm_qcache_max) { 1273 int qidx = (size + vm->vm_quantum_mask) >> vm->vm_quantum_shift; 1274 qcache_t *qc = vm->vm_qcache[qidx - 1]; 1275 1276 pool_cache_put(qc->qc_cache, (void *)addr); 1277 return; 1278 } 1279 #endif /* defined(QCACHE) */ 1280 1281 vmem_xfree(vm, addr, size); 1282 } 1283 1284 void 1285 vmem_xfree(vmem_t *vm, vmem_addr_t addr, vmem_size_t size) 1286 { 1287 bt_t *bt; 1288 bt_t *t; 1289 LIST_HEAD(, vmem_btag) tofree; 1290 1291 LIST_INIT(&tofree); 1292 1293 KASSERT(size > 0); 1294 1295 VMEM_LOCK(vm); 1296 1297 bt = bt_lookupbusy(vm, addr); 1298 KASSERT(bt != NULL); 1299 KASSERT(bt->bt_start == addr); 1300 KASSERT(bt->bt_size == vmem_roundup_size(vm, size) || 1301 bt->bt_size - vmem_roundup_size(vm, size) <= vm->vm_quantum_mask); 1302 KASSERT(bt->bt_type == BT_TYPE_BUSY); 1303 bt_rembusy(vm, bt); 1304 bt->bt_type = BT_TYPE_FREE; 1305 1306 /* coalesce */ 1307 t = TAILQ_NEXT(bt, bt_seglist); 1308 if (t != NULL && t->bt_type == BT_TYPE_FREE) { 1309 KASSERT(BT_END(bt) < t->bt_start); /* YYY */ 1310 bt_remfree(vm, t); 1311 bt_remseg(vm, t); 1312 bt->bt_size += t->bt_size; 1313 LIST_INSERT_HEAD(&tofree, t, bt_freelist); 1314 } 1315 t = TAILQ_PREV(bt, vmem_seglist, bt_seglist); 1316 if (t != NULL && t->bt_type == BT_TYPE_FREE) { 1317 KASSERT(BT_END(t) < bt->bt_start); /* YYY */ 1318 bt_remfree(vm, t); 1319 bt_remseg(vm, t); 1320 bt->bt_size += t->bt_size; 1321 bt->bt_start = t->bt_start; 1322 LIST_INSERT_HEAD(&tofree, t, bt_freelist); 1323 } 1324 1325 t = TAILQ_PREV(bt, vmem_seglist, bt_seglist); 1326 KASSERT(t != NULL); 1327 KASSERT(BT_ISSPAN_P(t) || t->bt_type == BT_TYPE_BUSY); 1328 if (vm->vm_releasefn != NULL && t->bt_type == BT_TYPE_SPAN && 1329 t->bt_size == bt->bt_size) { 1330 vmem_addr_t spanaddr; 1331 vmem_size_t spansize; 1332 1333 KASSERT(t->bt_start == bt->bt_start); 1334 spanaddr = bt->bt_start; 1335 spansize = bt->bt_size; 1336 bt_remseg(vm, bt); 1337 LIST_INSERT_HEAD(&tofree, bt, bt_freelist); 1338 bt_remseg(vm, t); 1339 LIST_INSERT_HEAD(&tofree, t, bt_freelist); 1340 vm->vm_size -= spansize; 1341 VMEM_CONDVAR_BROADCAST(vm); 1342 VMEM_UNLOCK(vm); 1343 (*vm->vm_releasefn)(vm->vm_arg, spanaddr, spansize); 1344 } else { 1345 bt_insfree(vm, bt); 1346 VMEM_CONDVAR_BROADCAST(vm); 1347 VMEM_UNLOCK(vm); 1348 } 1349 1350 while (!LIST_EMPTY(&tofree)) { 1351 t = LIST_FIRST(&tofree); 1352 LIST_REMOVE(t, bt_freelist); 1353 bt_free(vm, t); 1354 } 1355 1356 bt_freetrim(vm, BT_MAXFREE); 1357 } 1358 1359 /* 1360 * vmem_add: 1361 * 1362 * => caller must ensure appropriate spl, 1363 * if the arena can be accessed from interrupt context. 1364 */ 1365 1366 int 1367 vmem_add(vmem_t *vm, vmem_addr_t addr, vmem_size_t size, vm_flag_t flags) 1368 { 1369 1370 return vmem_add1(vm, addr, size, flags, BT_TYPE_SPAN_STATIC); 1371 } 1372 1373 /* 1374 * vmem_size: information about arenas size 1375 * 1376 * => return free/allocated size in arena 1377 */ 1378 vmem_size_t 1379 vmem_size(vmem_t *vm, int typemask) 1380 { 1381 1382 switch (typemask) { 1383 case VMEM_ALLOC: 1384 return vm->vm_inuse; 1385 case VMEM_FREE: 1386 return vm->vm_size - vm->vm_inuse; 1387 case VMEM_FREE|VMEM_ALLOC: 1388 return vm->vm_size; 1389 default: 1390 panic("vmem_size"); 1391 } 1392 } 1393 1394 /* ---- rehash */ 1395 1396 #if defined(_KERNEL) 1397 static struct callout vmem_rehash_ch; 1398 static int vmem_rehash_interval; 1399 static struct workqueue *vmem_rehash_wq; 1400 static struct work vmem_rehash_wk; 1401 1402 static void 1403 vmem_rehash_all(struct work *wk, void *dummy) 1404 { 1405 vmem_t *vm; 1406 1407 KASSERT(wk == &vmem_rehash_wk); 1408 mutex_enter(&vmem_list_lock); 1409 LIST_FOREACH(vm, &vmem_list, vm_alllist) { 1410 size_t desired; 1411 size_t current; 1412 1413 if (!VMEM_TRYLOCK(vm)) { 1414 continue; 1415 } 1416 desired = vm->vm_nbusytag; 1417 current = vm->vm_hashsize; 1418 VMEM_UNLOCK(vm); 1419 1420 if (desired > VMEM_HASHSIZE_MAX) { 1421 desired = VMEM_HASHSIZE_MAX; 1422 } else if (desired < VMEM_HASHSIZE_MIN) { 1423 desired = VMEM_HASHSIZE_MIN; 1424 } 1425 if (desired > current * 2 || desired * 2 < current) { 1426 vmem_rehash(vm, desired, VM_NOSLEEP); 1427 } 1428 } 1429 mutex_exit(&vmem_list_lock); 1430 1431 callout_schedule(&vmem_rehash_ch, vmem_rehash_interval); 1432 } 1433 1434 static void 1435 vmem_rehash_all_kick(void *dummy) 1436 { 1437 1438 workqueue_enqueue(vmem_rehash_wq, &vmem_rehash_wk, NULL); 1439 } 1440 1441 void 1442 vmem_rehash_start(void) 1443 { 1444 int error; 1445 1446 error = workqueue_create(&vmem_rehash_wq, "vmem_rehash", 1447 vmem_rehash_all, NULL, PRI_VM, IPL_SOFTCLOCK, WQ_MPSAFE); 1448 if (error) { 1449 panic("%s: workqueue_create %d\n", __func__, error); 1450 } 1451 callout_init(&vmem_rehash_ch, CALLOUT_MPSAFE); 1452 callout_setfunc(&vmem_rehash_ch, vmem_rehash_all_kick, NULL); 1453 1454 vmem_rehash_interval = hz * 10; 1455 callout_schedule(&vmem_rehash_ch, vmem_rehash_interval); 1456 } 1457 #endif /* defined(_KERNEL) */ 1458 1459 /* ---- debug */ 1460 1461 #if defined(DDB) || defined(UNITTEST) || defined(VMEM_SANITY) 1462 1463 static void bt_dump(const bt_t *, void (*)(const char *, ...) 1464 __printflike(1, 2)); 1465 1466 static const char * 1467 bt_type_string(int type) 1468 { 1469 static const char * const table[] = { 1470 [BT_TYPE_BUSY] = "busy", 1471 [BT_TYPE_FREE] = "free", 1472 [BT_TYPE_SPAN] = "span", 1473 [BT_TYPE_SPAN_STATIC] = "static span", 1474 }; 1475 1476 if (type >= __arraycount(table)) { 1477 return "BOGUS"; 1478 } 1479 return table[type]; 1480 } 1481 1482 static void 1483 bt_dump(const bt_t *bt, void (*pr)(const char *, ...)) 1484 { 1485 1486 (*pr)("\t%p: %" PRIu64 ", %" PRIu64 ", %d(%s)\n", 1487 bt, (uint64_t)bt->bt_start, (uint64_t)bt->bt_size, 1488 bt->bt_type, bt_type_string(bt->bt_type)); 1489 } 1490 1491 static void 1492 vmem_dump(const vmem_t *vm , void (*pr)(const char *, ...) __printflike(1, 2)) 1493 { 1494 const bt_t *bt; 1495 int i; 1496 1497 (*pr)("vmem %p '%s'\n", vm, vm->vm_name); 1498 TAILQ_FOREACH(bt, &vm->vm_seglist, bt_seglist) { 1499 bt_dump(bt, pr); 1500 } 1501 1502 for (i = 0; i < VMEM_MAXORDER; i++) { 1503 const struct vmem_freelist *fl = &vm->vm_freelist[i]; 1504 1505 if (LIST_EMPTY(fl)) { 1506 continue; 1507 } 1508 1509 (*pr)("freelist[%d]\n", i); 1510 LIST_FOREACH(bt, fl, bt_freelist) { 1511 bt_dump(bt, pr); 1512 } 1513 } 1514 } 1515 1516 #endif /* defined(DDB) || defined(UNITTEST) || defined(VMEM_SANITY) */ 1517 1518 #if defined(DDB) 1519 static bt_t * 1520 vmem_whatis_lookup(vmem_t *vm, uintptr_t addr) 1521 { 1522 bt_t *bt; 1523 1524 TAILQ_FOREACH(bt, &vm->vm_seglist, bt_seglist) { 1525 if (BT_ISSPAN_P(bt)) { 1526 continue; 1527 } 1528 if (bt->bt_start <= addr && addr <= BT_END(bt)) { 1529 return bt; 1530 } 1531 } 1532 1533 return NULL; 1534 } 1535 1536 void 1537 vmem_whatis(uintptr_t addr, void (*pr)(const char *, ...)) 1538 { 1539 vmem_t *vm; 1540 1541 LIST_FOREACH(vm, &vmem_list, vm_alllist) { 1542 bt_t *bt; 1543 1544 bt = vmem_whatis_lookup(vm, addr); 1545 if (bt == NULL) { 1546 continue; 1547 } 1548 (*pr)("%p is %p+%zu in VMEM '%s' (%s)\n", 1549 (void *)addr, (void *)bt->bt_start, 1550 (size_t)(addr - bt->bt_start), vm->vm_name, 1551 (bt->bt_type == BT_TYPE_BUSY) ? "allocated" : "free"); 1552 } 1553 } 1554 1555 void 1556 vmem_printall(const char *modif, void (*pr)(const char *, ...)) 1557 { 1558 const vmem_t *vm; 1559 1560 LIST_FOREACH(vm, &vmem_list, vm_alllist) { 1561 vmem_dump(vm, pr); 1562 } 1563 } 1564 1565 void 1566 vmem_print(uintptr_t addr, const char *modif, void (*pr)(const char *, ...)) 1567 { 1568 const vmem_t *vm = (const void *)addr; 1569 1570 vmem_dump(vm, pr); 1571 } 1572 #endif /* defined(DDB) */ 1573 1574 #if defined(_KERNEL) 1575 #define vmem_printf printf 1576 #else 1577 #include <stdio.h> 1578 #include <stdarg.h> 1579 1580 static void 1581 vmem_printf(const char *fmt, ...) 1582 { 1583 va_list ap; 1584 va_start(ap, fmt); 1585 vprintf(fmt, ap); 1586 va_end(ap); 1587 } 1588 #endif 1589 1590 #if defined(VMEM_SANITY) 1591 1592 static bool 1593 vmem_check_sanity(vmem_t *vm) 1594 { 1595 const bt_t *bt, *bt2; 1596 1597 KASSERT(vm != NULL); 1598 1599 TAILQ_FOREACH(bt, &vm->vm_seglist, bt_seglist) { 1600 if (bt->bt_start > BT_END(bt)) { 1601 printf("corrupted tag\n"); 1602 bt_dump(bt, vmem_printf); 1603 return false; 1604 } 1605 } 1606 TAILQ_FOREACH(bt, &vm->vm_seglist, bt_seglist) { 1607 TAILQ_FOREACH(bt2, &vm->vm_seglist, bt_seglist) { 1608 if (bt == bt2) { 1609 continue; 1610 } 1611 if (BT_ISSPAN_P(bt) != BT_ISSPAN_P(bt2)) { 1612 continue; 1613 } 1614 if (bt->bt_start <= BT_END(bt2) && 1615 bt2->bt_start <= BT_END(bt)) { 1616 printf("overwrapped tags\n"); 1617 bt_dump(bt, vmem_printf); 1618 bt_dump(bt2, vmem_printf); 1619 return false; 1620 } 1621 } 1622 } 1623 1624 return true; 1625 } 1626 1627 static void 1628 vmem_check(vmem_t *vm) 1629 { 1630 1631 if (!vmem_check_sanity(vm)) { 1632 panic("insanity vmem %p", vm); 1633 } 1634 } 1635 1636 #endif /* defined(VMEM_SANITY) */ 1637 1638 #if defined(UNITTEST) 1639 int 1640 main(void) 1641 { 1642 int rc; 1643 vmem_t *vm; 1644 vmem_addr_t p; 1645 struct reg { 1646 vmem_addr_t p; 1647 vmem_size_t sz; 1648 bool x; 1649 } *reg = NULL; 1650 int nreg = 0; 1651 int nalloc = 0; 1652 int nfree = 0; 1653 vmem_size_t total = 0; 1654 #if 1 1655 vm_flag_t strat = VM_INSTANTFIT; 1656 #else 1657 vm_flag_t strat = VM_BESTFIT; 1658 #endif 1659 1660 vm = vmem_create("test", 0, 0, 1, NULL, NULL, NULL, 0, VM_SLEEP, 1661 #ifdef _KERNEL 1662 IPL_NONE 1663 #else 1664 0 1665 #endif 1666 ); 1667 if (vm == NULL) { 1668 printf("vmem_create\n"); 1669 exit(EXIT_FAILURE); 1670 } 1671 vmem_dump(vm, vmem_printf); 1672 1673 rc = vmem_add(vm, 0, 50, VM_SLEEP); 1674 assert(rc == 0); 1675 rc = vmem_add(vm, 100, 200, VM_SLEEP); 1676 assert(rc == 0); 1677 rc = vmem_add(vm, 2000, 1, VM_SLEEP); 1678 assert(rc == 0); 1679 rc = vmem_add(vm, 40000, 65536, VM_SLEEP); 1680 assert(rc == 0); 1681 rc = vmem_add(vm, 10000, 10000, VM_SLEEP); 1682 assert(rc == 0); 1683 rc = vmem_add(vm, 500, 1000, VM_SLEEP); 1684 assert(rc == 0); 1685 rc = vmem_add(vm, 0xffffff00, 0x100, VM_SLEEP); 1686 assert(rc == 0); 1687 rc = vmem_xalloc(vm, 0x101, 0, 0, 0, 1688 0xffffff00, 0xffffffff, strat|VM_SLEEP, &p); 1689 assert(rc != 0); 1690 rc = vmem_xalloc(vm, 50, 0, 0, 0, 0, 49, strat|VM_SLEEP, &p); 1691 assert(rc == 0 && p == 0); 1692 vmem_xfree(vm, p, 50); 1693 rc = vmem_xalloc(vm, 25, 0, 0, 0, 0, 24, strat|VM_SLEEP, &p); 1694 assert(rc == 0 && p == 0); 1695 rc = vmem_xalloc(vm, 0x100, 0, 0, 0, 1696 0xffffff01, 0xffffffff, strat|VM_SLEEP, &p); 1697 assert(rc != 0); 1698 rc = vmem_xalloc(vm, 0x100, 0, 0, 0, 1699 0xffffff00, 0xfffffffe, strat|VM_SLEEP, &p); 1700 assert(rc != 0); 1701 rc = vmem_xalloc(vm, 0x100, 0, 0, 0, 1702 0xffffff00, 0xffffffff, strat|VM_SLEEP, &p); 1703 assert(rc == 0); 1704 vmem_dump(vm, vmem_printf); 1705 for (;;) { 1706 struct reg *r; 1707 int t = rand() % 100; 1708 1709 if (t > 45) { 1710 /* alloc */ 1711 vmem_size_t sz = rand() % 500 + 1; 1712 bool x; 1713 vmem_size_t align, phase, nocross; 1714 vmem_addr_t minaddr, maxaddr; 1715 1716 if (t > 70) { 1717 x = true; 1718 /* XXX */ 1719 align = 1 << (rand() % 15); 1720 phase = rand() % 65536; 1721 nocross = 1 << (rand() % 15); 1722 if (align <= phase) { 1723 phase = 0; 1724 } 1725 if (VMEM_CROSS_P(phase, phase + sz - 1, 1726 nocross)) { 1727 nocross = 0; 1728 } 1729 do { 1730 minaddr = rand() % 50000; 1731 maxaddr = rand() % 70000; 1732 } while (minaddr > maxaddr); 1733 printf("=== xalloc %" PRIu64 1734 " align=%" PRIu64 ", phase=%" PRIu64 1735 ", nocross=%" PRIu64 ", min=%" PRIu64 1736 ", max=%" PRIu64 "\n", 1737 (uint64_t)sz, 1738 (uint64_t)align, 1739 (uint64_t)phase, 1740 (uint64_t)nocross, 1741 (uint64_t)minaddr, 1742 (uint64_t)maxaddr); 1743 rc = vmem_xalloc(vm, sz, align, phase, nocross, 1744 minaddr, maxaddr, strat|VM_SLEEP, &p); 1745 } else { 1746 x = false; 1747 printf("=== alloc %" PRIu64 "\n", (uint64_t)sz); 1748 rc = vmem_alloc(vm, sz, strat|VM_SLEEP, &p); 1749 } 1750 printf("-> %" PRIu64 "\n", (uint64_t)p); 1751 vmem_dump(vm, vmem_printf); 1752 if (rc != 0) { 1753 if (x) { 1754 continue; 1755 } 1756 break; 1757 } 1758 nreg++; 1759 reg = realloc(reg, sizeof(*reg) * nreg); 1760 r = ®[nreg - 1]; 1761 r->p = p; 1762 r->sz = sz; 1763 r->x = x; 1764 total += sz; 1765 nalloc++; 1766 } else if (nreg != 0) { 1767 /* free */ 1768 r = ®[rand() % nreg]; 1769 printf("=== free %" PRIu64 ", %" PRIu64 "\n", 1770 (uint64_t)r->p, (uint64_t)r->sz); 1771 if (r->x) { 1772 vmem_xfree(vm, r->p, r->sz); 1773 } else { 1774 vmem_free(vm, r->p, r->sz); 1775 } 1776 total -= r->sz; 1777 vmem_dump(vm, vmem_printf); 1778 *r = reg[nreg - 1]; 1779 nreg--; 1780 nfree++; 1781 } 1782 printf("total=%" PRIu64 "\n", (uint64_t)total); 1783 } 1784 fprintf(stderr, "total=%" PRIu64 ", nalloc=%d, nfree=%d\n", 1785 (uint64_t)total, nalloc, nfree); 1786 exit(EXIT_SUCCESS); 1787 } 1788 #endif /* defined(UNITTEST) */ 1789