1 /* $NetBSD: subr_vmem.c,v 1.92 2014/04/02 18:09:10 para Exp $ */ 2 3 /*- 4 * Copyright (c)2006,2007,2008,2009 YAMAMOTO Takashi, 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 */ 28 29 /* 30 * reference: 31 * - Magazines and Vmem: Extending the Slab Allocator 32 * to Many CPUs and Arbitrary Resources 33 * http://www.usenix.org/event/usenix01/bonwick.html 34 * 35 * locking & the boundary tag pool: 36 * - A pool(9) is used for vmem boundary tags 37 * - During a pool get call the global vmem_btag_refill_lock is taken, 38 * to serialize access to the allocation reserve, but no other 39 * vmem arena locks. 40 * - During pool_put calls no vmem mutexes are locked. 41 * - pool_drain doesn't hold the pool's mutex while releasing memory to 42 * its backing therefore no interferance with any vmem mutexes. 43 * - The boundary tag pool is forced to put page headers into pool pages 44 * (PR_PHINPAGE) and not off page to avoid pool recursion. 45 * (due to sizeof(bt_t) it should be the case anyway) 46 */ 47 48 #include <sys/cdefs.h> 49 __KERNEL_RCSID(0, "$NetBSD: subr_vmem.c,v 1.92 2014/04/02 18:09:10 para Exp $"); 50 51 #if defined(_KERNEL) 52 #include "opt_ddb.h" 53 #endif /* defined(_KERNEL) */ 54 55 #include <sys/param.h> 56 #include <sys/hash.h> 57 #include <sys/queue.h> 58 #include <sys/bitops.h> 59 60 #if defined(_KERNEL) 61 #include <sys/systm.h> 62 #include <sys/kernel.h> /* hz */ 63 #include <sys/callout.h> 64 #include <sys/kmem.h> 65 #include <sys/pool.h> 66 #include <sys/vmem.h> 67 #include <sys/vmem_impl.h> 68 #include <sys/workqueue.h> 69 #include <sys/atomic.h> 70 #include <uvm/uvm.h> 71 #include <uvm/uvm_extern.h> 72 #include <uvm/uvm_km.h> 73 #include <uvm/uvm_page.h> 74 #include <uvm/uvm_pdaemon.h> 75 #else /* defined(_KERNEL) */ 76 #include <stdio.h> 77 #include <errno.h> 78 #include <assert.h> 79 #include <stdlib.h> 80 #include <string.h> 81 #include "../sys/vmem.h" 82 #include "../sys/vmem_impl.h" 83 #endif /* defined(_KERNEL) */ 84 85 86 #if defined(_KERNEL) 87 #include <sys/evcnt.h> 88 #define VMEM_EVCNT_DEFINE(name) \ 89 struct evcnt vmem_evcnt_##name = EVCNT_INITIALIZER(EVCNT_TYPE_MISC, NULL, \ 90 "vmem", #name); \ 91 EVCNT_ATTACH_STATIC(vmem_evcnt_##name); 92 #define VMEM_EVCNT_INCR(ev) vmem_evcnt_##ev.ev_count++ 93 #define VMEM_EVCNT_DECR(ev) vmem_evcnt_##ev.ev_count-- 94 95 VMEM_EVCNT_DEFINE(static_bt_count) 96 VMEM_EVCNT_DEFINE(static_bt_inuse) 97 98 #define VMEM_CONDVAR_INIT(vm, wchan) cv_init(&vm->vm_cv, wchan) 99 #define VMEM_CONDVAR_DESTROY(vm) cv_destroy(&vm->vm_cv) 100 #define VMEM_CONDVAR_WAIT(vm) cv_wait(&vm->vm_cv, &vm->vm_lock) 101 #define VMEM_CONDVAR_BROADCAST(vm) cv_broadcast(&vm->vm_cv) 102 103 #else /* defined(_KERNEL) */ 104 105 #define VMEM_EVCNT_INCR(ev) /* nothing */ 106 #define VMEM_EVCNT_DECR(ev) /* nothing */ 107 108 #define VMEM_CONDVAR_INIT(vm, wchan) /* nothing */ 109 #define VMEM_CONDVAR_DESTROY(vm) /* nothing */ 110 #define VMEM_CONDVAR_WAIT(vm) /* nothing */ 111 #define VMEM_CONDVAR_BROADCAST(vm) /* nothing */ 112 113 #define UNITTEST 114 #define KASSERT(a) assert(a) 115 #define mutex_init(a, b, c) /* nothing */ 116 #define mutex_destroy(a) /* nothing */ 117 #define mutex_enter(a) /* nothing */ 118 #define mutex_tryenter(a) true 119 #define mutex_exit(a) /* nothing */ 120 #define mutex_owned(a) /* nothing */ 121 #define ASSERT_SLEEPABLE() /* nothing */ 122 #define panic(...) printf(__VA_ARGS__); abort() 123 #endif /* defined(_KERNEL) */ 124 125 #if defined(VMEM_SANITY) 126 static void vmem_check(vmem_t *); 127 #else /* defined(VMEM_SANITY) */ 128 #define vmem_check(vm) /* nothing */ 129 #endif /* defined(VMEM_SANITY) */ 130 131 #define VMEM_HASHSIZE_MIN 1 /* XXX */ 132 #define VMEM_HASHSIZE_MAX 65536 /* XXX */ 133 #define VMEM_HASHSIZE_INIT 1 134 135 #define VM_FITMASK (VM_BESTFIT | VM_INSTANTFIT) 136 137 #if defined(_KERNEL) 138 static bool vmem_bootstrapped = false; 139 static kmutex_t vmem_list_lock; 140 static LIST_HEAD(, vmem) vmem_list = LIST_HEAD_INITIALIZER(vmem_list); 141 #endif /* defined(_KERNEL) */ 142 143 /* ---- misc */ 144 145 #define VMEM_LOCK(vm) mutex_enter(&vm->vm_lock) 146 #define VMEM_TRYLOCK(vm) mutex_tryenter(&vm->vm_lock) 147 #define VMEM_UNLOCK(vm) mutex_exit(&vm->vm_lock) 148 #define VMEM_LOCK_INIT(vm, ipl) mutex_init(&vm->vm_lock, MUTEX_DEFAULT, ipl) 149 #define VMEM_LOCK_DESTROY(vm) mutex_destroy(&vm->vm_lock) 150 #define VMEM_ASSERT_LOCKED(vm) KASSERT(mutex_owned(&vm->vm_lock)) 151 152 #define VMEM_ALIGNUP(addr, align) \ 153 (-(-(addr) & -(align))) 154 155 #define VMEM_CROSS_P(addr1, addr2, boundary) \ 156 ((((addr1) ^ (addr2)) & -(boundary)) != 0) 157 158 #define ORDER2SIZE(order) ((vmem_size_t)1 << (order)) 159 #define SIZE2ORDER(size) ((int)ilog2(size)) 160 161 #if !defined(_KERNEL) 162 #define xmalloc(sz, flags) malloc(sz) 163 #define xfree(p, sz) free(p) 164 #define bt_alloc(vm, flags) malloc(sizeof(bt_t)) 165 #define bt_free(vm, bt) free(bt) 166 #else /* defined(_KERNEL) */ 167 168 #define xmalloc(sz, flags) \ 169 kmem_alloc(sz, ((flags) & VM_SLEEP) ? KM_SLEEP : KM_NOSLEEP); 170 #define xfree(p, sz) kmem_free(p, sz); 171 172 /* 173 * BT_RESERVE calculation: 174 * we allocate memory for boundry tags with vmem, therefor we have 175 * to keep a reserve of bts used to allocated memory for bts. 176 * This reserve is 4 for each arena involved in allocating vmems memory. 177 * BT_MAXFREE: don't cache excessive counts of bts in arenas 178 */ 179 #define STATIC_BT_COUNT 200 180 #define BT_MINRESERVE 4 181 #define BT_MAXFREE 64 182 183 static struct vmem_btag static_bts[STATIC_BT_COUNT]; 184 static int static_bt_count = STATIC_BT_COUNT; 185 186 static struct vmem kmem_va_meta_arena_store; 187 vmem_t *kmem_va_meta_arena; 188 static struct vmem kmem_meta_arena_store; 189 vmem_t *kmem_meta_arena = NULL; 190 191 static kmutex_t vmem_btag_refill_lock; 192 static kmutex_t vmem_btag_lock; 193 static LIST_HEAD(, vmem_btag) vmem_btag_freelist; 194 static size_t vmem_btag_freelist_count = 0; 195 static struct pool vmem_btag_pool; 196 197 /* ---- boundary tag */ 198 199 static int bt_refill(vmem_t *vm, vm_flag_t flags); 200 201 static void * 202 pool_page_alloc_vmem_meta(struct pool *pp, int flags) 203 { 204 const vm_flag_t vflags = (flags & PR_WAITOK) ? VM_SLEEP: VM_NOSLEEP; 205 vmem_addr_t va; 206 int ret; 207 208 ret = vmem_alloc(kmem_meta_arena, pp->pr_alloc->pa_pagesz, 209 (vflags & ~VM_FITMASK) | VM_INSTANTFIT | VM_POPULATING, &va); 210 211 return ret ? NULL : (void *)va; 212 } 213 214 static void 215 pool_page_free_vmem_meta(struct pool *pp, void *v) 216 { 217 218 vmem_free(kmem_meta_arena, (vmem_addr_t)v, pp->pr_alloc->pa_pagesz); 219 } 220 221 /* allocator for vmem-pool metadata */ 222 struct pool_allocator pool_allocator_vmem_meta = { 223 .pa_alloc = pool_page_alloc_vmem_meta, 224 .pa_free = pool_page_free_vmem_meta, 225 .pa_pagesz = 0 226 }; 227 228 static int 229 bt_refill(vmem_t *vm, vm_flag_t flags) 230 { 231 bt_t *bt; 232 233 KASSERT(flags & VM_NOSLEEP); 234 235 VMEM_LOCK(vm); 236 if (vm->vm_nfreetags > BT_MINRESERVE) { 237 VMEM_UNLOCK(vm); 238 return 0; 239 } 240 241 mutex_enter(&vmem_btag_lock); 242 while (!LIST_EMPTY(&vmem_btag_freelist) && 243 vm->vm_nfreetags <= BT_MINRESERVE) { 244 bt = LIST_FIRST(&vmem_btag_freelist); 245 LIST_REMOVE(bt, bt_freelist); 246 LIST_INSERT_HEAD(&vm->vm_freetags, bt, bt_freelist); 247 vm->vm_nfreetags++; 248 vmem_btag_freelist_count--; 249 VMEM_EVCNT_INCR(static_bt_inuse); 250 } 251 mutex_exit(&vmem_btag_lock); 252 253 while (vm->vm_nfreetags <= BT_MINRESERVE) { 254 VMEM_UNLOCK(vm); 255 mutex_enter(&vmem_btag_refill_lock); 256 bt = pool_get(&vmem_btag_pool, PR_NOWAIT); 257 mutex_exit(&vmem_btag_refill_lock); 258 VMEM_LOCK(vm); 259 if (bt == NULL) 260 break; 261 LIST_INSERT_HEAD(&vm->vm_freetags, bt, bt_freelist); 262 vm->vm_nfreetags++; 263 } 264 265 if (vm->vm_nfreetags <= BT_MINRESERVE) { 266 VMEM_UNLOCK(vm); 267 return ENOMEM; 268 } 269 270 VMEM_UNLOCK(vm); 271 272 if (kmem_meta_arena != NULL) { 273 bt_refill(kmem_arena, (flags & ~VM_FITMASK) 274 | VM_INSTANTFIT | VM_POPULATING); 275 bt_refill(kmem_va_meta_arena, (flags & ~VM_FITMASK) 276 | VM_INSTANTFIT | VM_POPULATING); 277 bt_refill(kmem_meta_arena, (flags & ~VM_FITMASK) 278 | VM_INSTANTFIT | VM_POPULATING); 279 } 280 281 return 0; 282 } 283 284 static bt_t * 285 bt_alloc(vmem_t *vm, vm_flag_t flags) 286 { 287 bt_t *bt; 288 VMEM_LOCK(vm); 289 while (vm->vm_nfreetags <= BT_MINRESERVE && (flags & VM_POPULATING) == 0) { 290 VMEM_UNLOCK(vm); 291 if (bt_refill(vm, VM_NOSLEEP | VM_INSTANTFIT)) { 292 return NULL; 293 } 294 VMEM_LOCK(vm); 295 } 296 bt = LIST_FIRST(&vm->vm_freetags); 297 LIST_REMOVE(bt, bt_freelist); 298 vm->vm_nfreetags--; 299 VMEM_UNLOCK(vm); 300 301 return bt; 302 } 303 304 static void 305 bt_free(vmem_t *vm, bt_t *bt) 306 { 307 308 VMEM_LOCK(vm); 309 LIST_INSERT_HEAD(&vm->vm_freetags, bt, bt_freelist); 310 vm->vm_nfreetags++; 311 VMEM_UNLOCK(vm); 312 } 313 314 static void 315 bt_freetrim(vmem_t *vm, int freelimit) 316 { 317 bt_t *t; 318 LIST_HEAD(, vmem_btag) tofree; 319 320 LIST_INIT(&tofree); 321 322 VMEM_LOCK(vm); 323 while (vm->vm_nfreetags > freelimit) { 324 bt_t *bt = LIST_FIRST(&vm->vm_freetags); 325 LIST_REMOVE(bt, bt_freelist); 326 vm->vm_nfreetags--; 327 if (bt >= static_bts 328 && bt < &static_bts[STATIC_BT_COUNT]) { 329 mutex_enter(&vmem_btag_lock); 330 LIST_INSERT_HEAD(&vmem_btag_freelist, bt, bt_freelist); 331 vmem_btag_freelist_count++; 332 mutex_exit(&vmem_btag_lock); 333 VMEM_EVCNT_DECR(static_bt_inuse); 334 } else { 335 LIST_INSERT_HEAD(&tofree, bt, bt_freelist); 336 } 337 } 338 339 VMEM_UNLOCK(vm); 340 while (!LIST_EMPTY(&tofree)) { 341 t = LIST_FIRST(&tofree); 342 LIST_REMOVE(t, bt_freelist); 343 pool_put(&vmem_btag_pool, t); 344 } 345 } 346 #endif /* defined(_KERNEL) */ 347 348 /* 349 * freelist[0] ... [1, 1] 350 * freelist[1] ... [2, 3] 351 * freelist[2] ... [4, 7] 352 * freelist[3] ... [8, 15] 353 * : 354 * freelist[n] ... [(1 << n), (1 << (n + 1)) - 1] 355 * : 356 */ 357 358 static struct vmem_freelist * 359 bt_freehead_tofree(vmem_t *vm, vmem_size_t size) 360 { 361 const vmem_size_t qsize = size >> vm->vm_quantum_shift; 362 const int idx = SIZE2ORDER(qsize); 363 364 KASSERT(size != 0 && qsize != 0); 365 KASSERT((size & vm->vm_quantum_mask) == 0); 366 KASSERT(idx >= 0); 367 KASSERT(idx < VMEM_MAXORDER); 368 369 return &vm->vm_freelist[idx]; 370 } 371 372 /* 373 * bt_freehead_toalloc: return the freelist for the given size and allocation 374 * strategy. 375 * 376 * for VM_INSTANTFIT, return the list in which any blocks are large enough 377 * for the requested size. otherwise, return the list which can have blocks 378 * large enough for the requested size. 379 */ 380 381 static struct vmem_freelist * 382 bt_freehead_toalloc(vmem_t *vm, vmem_size_t size, vm_flag_t strat) 383 { 384 const vmem_size_t qsize = size >> vm->vm_quantum_shift; 385 int idx = SIZE2ORDER(qsize); 386 387 KASSERT(size != 0 && qsize != 0); 388 KASSERT((size & vm->vm_quantum_mask) == 0); 389 390 if (strat == VM_INSTANTFIT && ORDER2SIZE(idx) != qsize) { 391 idx++; 392 /* check too large request? */ 393 } 394 KASSERT(idx >= 0); 395 KASSERT(idx < VMEM_MAXORDER); 396 397 return &vm->vm_freelist[idx]; 398 } 399 400 /* ---- boundary tag hash */ 401 402 static struct vmem_hashlist * 403 bt_hashhead(vmem_t *vm, vmem_addr_t addr) 404 { 405 struct vmem_hashlist *list; 406 unsigned int hash; 407 408 hash = hash32_buf(&addr, sizeof(addr), HASH32_BUF_INIT); 409 list = &vm->vm_hashlist[hash % vm->vm_hashsize]; 410 411 return list; 412 } 413 414 static bt_t * 415 bt_lookupbusy(vmem_t *vm, vmem_addr_t addr) 416 { 417 struct vmem_hashlist *list; 418 bt_t *bt; 419 420 list = bt_hashhead(vm, addr); 421 LIST_FOREACH(bt, list, bt_hashlist) { 422 if (bt->bt_start == addr) { 423 break; 424 } 425 } 426 427 return bt; 428 } 429 430 static void 431 bt_rembusy(vmem_t *vm, bt_t *bt) 432 { 433 434 KASSERT(vm->vm_nbusytag > 0); 435 vm->vm_inuse -= bt->bt_size; 436 vm->vm_nbusytag--; 437 LIST_REMOVE(bt, bt_hashlist); 438 } 439 440 static void 441 bt_insbusy(vmem_t *vm, bt_t *bt) 442 { 443 struct vmem_hashlist *list; 444 445 KASSERT(bt->bt_type == BT_TYPE_BUSY); 446 447 list = bt_hashhead(vm, bt->bt_start); 448 LIST_INSERT_HEAD(list, bt, bt_hashlist); 449 vm->vm_nbusytag++; 450 vm->vm_inuse += bt->bt_size; 451 } 452 453 /* ---- boundary tag list */ 454 455 static void 456 bt_remseg(vmem_t *vm, bt_t *bt) 457 { 458 459 TAILQ_REMOVE(&vm->vm_seglist, bt, bt_seglist); 460 } 461 462 static void 463 bt_insseg(vmem_t *vm, bt_t *bt, bt_t *prev) 464 { 465 466 TAILQ_INSERT_AFTER(&vm->vm_seglist, prev, bt, bt_seglist); 467 } 468 469 static void 470 bt_insseg_tail(vmem_t *vm, bt_t *bt) 471 { 472 473 TAILQ_INSERT_TAIL(&vm->vm_seglist, bt, bt_seglist); 474 } 475 476 static void 477 bt_remfree(vmem_t *vm, bt_t *bt) 478 { 479 480 KASSERT(bt->bt_type == BT_TYPE_FREE); 481 482 LIST_REMOVE(bt, bt_freelist); 483 } 484 485 static void 486 bt_insfree(vmem_t *vm, bt_t *bt) 487 { 488 struct vmem_freelist *list; 489 490 list = bt_freehead_tofree(vm, bt->bt_size); 491 LIST_INSERT_HEAD(list, bt, bt_freelist); 492 } 493 494 /* ---- vmem internal functions */ 495 496 #if defined(QCACHE) 497 static inline vm_flag_t 498 prf_to_vmf(int prflags) 499 { 500 vm_flag_t vmflags; 501 502 KASSERT((prflags & ~(PR_LIMITFAIL | PR_WAITOK | PR_NOWAIT)) == 0); 503 if ((prflags & PR_WAITOK) != 0) { 504 vmflags = VM_SLEEP; 505 } else { 506 vmflags = VM_NOSLEEP; 507 } 508 return vmflags; 509 } 510 511 static inline int 512 vmf_to_prf(vm_flag_t vmflags) 513 { 514 int prflags; 515 516 if ((vmflags & VM_SLEEP) != 0) { 517 prflags = PR_WAITOK; 518 } else { 519 prflags = PR_NOWAIT; 520 } 521 return prflags; 522 } 523 524 static size_t 525 qc_poolpage_size(size_t qcache_max) 526 { 527 int i; 528 529 for (i = 0; ORDER2SIZE(i) <= qcache_max * 3; i++) { 530 /* nothing */ 531 } 532 return ORDER2SIZE(i); 533 } 534 535 static void * 536 qc_poolpage_alloc(struct pool *pool, int prflags) 537 { 538 qcache_t *qc = QC_POOL_TO_QCACHE(pool); 539 vmem_t *vm = qc->qc_vmem; 540 vmem_addr_t addr; 541 542 if (vmem_alloc(vm, pool->pr_alloc->pa_pagesz, 543 prf_to_vmf(prflags) | VM_INSTANTFIT, &addr) != 0) 544 return NULL; 545 return (void *)addr; 546 } 547 548 static void 549 qc_poolpage_free(struct pool *pool, void *addr) 550 { 551 qcache_t *qc = QC_POOL_TO_QCACHE(pool); 552 vmem_t *vm = qc->qc_vmem; 553 554 vmem_free(vm, (vmem_addr_t)addr, pool->pr_alloc->pa_pagesz); 555 } 556 557 static void 558 qc_init(vmem_t *vm, size_t qcache_max, int ipl) 559 { 560 qcache_t *prevqc; 561 struct pool_allocator *pa; 562 int qcache_idx_max; 563 int i; 564 565 KASSERT((qcache_max & vm->vm_quantum_mask) == 0); 566 if (qcache_max > (VMEM_QCACHE_IDX_MAX << vm->vm_quantum_shift)) { 567 qcache_max = VMEM_QCACHE_IDX_MAX << vm->vm_quantum_shift; 568 } 569 vm->vm_qcache_max = qcache_max; 570 pa = &vm->vm_qcache_allocator; 571 memset(pa, 0, sizeof(*pa)); 572 pa->pa_alloc = qc_poolpage_alloc; 573 pa->pa_free = qc_poolpage_free; 574 pa->pa_pagesz = qc_poolpage_size(qcache_max); 575 576 qcache_idx_max = qcache_max >> vm->vm_quantum_shift; 577 prevqc = NULL; 578 for (i = qcache_idx_max; i > 0; i--) { 579 qcache_t *qc = &vm->vm_qcache_store[i - 1]; 580 size_t size = i << vm->vm_quantum_shift; 581 pool_cache_t pc; 582 583 qc->qc_vmem = vm; 584 snprintf(qc->qc_name, sizeof(qc->qc_name), "%s-%zu", 585 vm->vm_name, size); 586 587 pc = pool_cache_init(size, 588 ORDER2SIZE(vm->vm_quantum_shift), 0, 589 PR_NOALIGN | PR_NOTOUCH | PR_RECURSIVE /* XXX */, 590 qc->qc_name, pa, ipl, NULL, NULL, NULL); 591 592 KASSERT(pc); 593 594 qc->qc_cache = pc; 595 KASSERT(qc->qc_cache != NULL); /* XXX */ 596 if (prevqc != NULL && 597 qc->qc_cache->pc_pool.pr_itemsperpage == 598 prevqc->qc_cache->pc_pool.pr_itemsperpage) { 599 pool_cache_destroy(qc->qc_cache); 600 vm->vm_qcache[i - 1] = prevqc; 601 continue; 602 } 603 qc->qc_cache->pc_pool.pr_qcache = qc; 604 vm->vm_qcache[i - 1] = qc; 605 prevqc = qc; 606 } 607 } 608 609 static void 610 qc_destroy(vmem_t *vm) 611 { 612 const qcache_t *prevqc; 613 int i; 614 int qcache_idx_max; 615 616 qcache_idx_max = vm->vm_qcache_max >> vm->vm_quantum_shift; 617 prevqc = NULL; 618 for (i = 0; i < qcache_idx_max; i++) { 619 qcache_t *qc = vm->vm_qcache[i]; 620 621 if (prevqc == qc) { 622 continue; 623 } 624 pool_cache_destroy(qc->qc_cache); 625 prevqc = qc; 626 } 627 } 628 #endif 629 630 #if defined(_KERNEL) 631 static void 632 vmem_bootstrap(void) 633 { 634 635 mutex_init(&vmem_list_lock, MUTEX_DEFAULT, IPL_VM); 636 mutex_init(&vmem_btag_lock, MUTEX_DEFAULT, IPL_VM); 637 mutex_init(&vmem_btag_refill_lock, MUTEX_DEFAULT, IPL_VM); 638 639 while (static_bt_count-- > 0) { 640 bt_t *bt = &static_bts[static_bt_count]; 641 LIST_INSERT_HEAD(&vmem_btag_freelist, bt, bt_freelist); 642 VMEM_EVCNT_INCR(static_bt_count); 643 vmem_btag_freelist_count++; 644 } 645 vmem_bootstrapped = TRUE; 646 } 647 648 void 649 vmem_subsystem_init(vmem_t *vm) 650 { 651 652 kmem_va_meta_arena = vmem_init(&kmem_va_meta_arena_store, "vmem-va", 653 0, 0, PAGE_SIZE, vmem_alloc, vmem_free, vm, 654 0, VM_NOSLEEP | VM_BOOTSTRAP | VM_LARGEIMPORT, 655 IPL_VM); 656 657 kmem_meta_arena = vmem_init(&kmem_meta_arena_store, "vmem-meta", 658 0, 0, PAGE_SIZE, 659 uvm_km_kmem_alloc, uvm_km_kmem_free, kmem_va_meta_arena, 660 0, VM_NOSLEEP | VM_BOOTSTRAP, IPL_VM); 661 662 pool_init(&vmem_btag_pool, sizeof(bt_t), 0, 0, PR_PHINPAGE, 663 "vmembt", &pool_allocator_vmem_meta, IPL_VM); 664 } 665 #endif /* defined(_KERNEL) */ 666 667 static int 668 vmem_add1(vmem_t *vm, vmem_addr_t addr, vmem_size_t size, vm_flag_t flags, 669 int spanbttype) 670 { 671 bt_t *btspan; 672 bt_t *btfree; 673 674 KASSERT((flags & (VM_SLEEP|VM_NOSLEEP)) != 0); 675 KASSERT((~flags & (VM_SLEEP|VM_NOSLEEP)) != 0); 676 KASSERT(spanbttype == BT_TYPE_SPAN || 677 spanbttype == BT_TYPE_SPAN_STATIC); 678 679 btspan = bt_alloc(vm, flags); 680 if (btspan == NULL) { 681 return ENOMEM; 682 } 683 btfree = bt_alloc(vm, flags); 684 if (btfree == NULL) { 685 bt_free(vm, btspan); 686 return ENOMEM; 687 } 688 689 btspan->bt_type = spanbttype; 690 btspan->bt_start = addr; 691 btspan->bt_size = size; 692 693 btfree->bt_type = BT_TYPE_FREE; 694 btfree->bt_start = addr; 695 btfree->bt_size = size; 696 697 VMEM_LOCK(vm); 698 bt_insseg_tail(vm, btspan); 699 bt_insseg(vm, btfree, btspan); 700 bt_insfree(vm, btfree); 701 vm->vm_size += size; 702 VMEM_UNLOCK(vm); 703 704 return 0; 705 } 706 707 static void 708 vmem_destroy1(vmem_t *vm) 709 { 710 711 #if defined(QCACHE) 712 qc_destroy(vm); 713 #endif /* defined(QCACHE) */ 714 if (vm->vm_hashlist != NULL) { 715 int i; 716 717 for (i = 0; i < vm->vm_hashsize; i++) { 718 bt_t *bt; 719 720 while ((bt = LIST_FIRST(&vm->vm_hashlist[i])) != NULL) { 721 KASSERT(bt->bt_type == BT_TYPE_SPAN_STATIC); 722 bt_free(vm, bt); 723 } 724 } 725 if (vm->vm_hashlist != &vm->vm_hash0) { 726 xfree(vm->vm_hashlist, 727 sizeof(struct vmem_hashlist *) * vm->vm_hashsize); 728 } 729 } 730 731 bt_freetrim(vm, 0); 732 733 VMEM_CONDVAR_DESTROY(vm); 734 VMEM_LOCK_DESTROY(vm); 735 xfree(vm, sizeof(*vm)); 736 } 737 738 static int 739 vmem_import(vmem_t *vm, vmem_size_t size, vm_flag_t flags) 740 { 741 vmem_addr_t addr; 742 int rc; 743 744 if (vm->vm_importfn == NULL) { 745 return EINVAL; 746 } 747 748 if (vm->vm_flags & VM_LARGEIMPORT) { 749 size *= 16; 750 } 751 752 if (vm->vm_flags & VM_XIMPORT) { 753 rc = ((vmem_ximport_t *)vm->vm_importfn)(vm->vm_arg, size, 754 &size, flags, &addr); 755 } else { 756 rc = (vm->vm_importfn)(vm->vm_arg, size, flags, &addr); 757 } 758 if (rc) { 759 return ENOMEM; 760 } 761 762 if (vmem_add1(vm, addr, size, flags, BT_TYPE_SPAN) != 0) { 763 (*vm->vm_releasefn)(vm->vm_arg, addr, size); 764 return ENOMEM; 765 } 766 767 return 0; 768 } 769 770 static int 771 vmem_rehash(vmem_t *vm, size_t newhashsize, vm_flag_t flags) 772 { 773 bt_t *bt; 774 int i; 775 struct vmem_hashlist *newhashlist; 776 struct vmem_hashlist *oldhashlist; 777 size_t oldhashsize; 778 779 KASSERT(newhashsize > 0); 780 781 newhashlist = 782 xmalloc(sizeof(struct vmem_hashlist *) * newhashsize, flags); 783 if (newhashlist == NULL) { 784 return ENOMEM; 785 } 786 for (i = 0; i < newhashsize; i++) { 787 LIST_INIT(&newhashlist[i]); 788 } 789 790 if (!VMEM_TRYLOCK(vm)) { 791 xfree(newhashlist, 792 sizeof(struct vmem_hashlist *) * newhashsize); 793 return EBUSY; 794 } 795 oldhashlist = vm->vm_hashlist; 796 oldhashsize = vm->vm_hashsize; 797 vm->vm_hashlist = newhashlist; 798 vm->vm_hashsize = newhashsize; 799 if (oldhashlist == NULL) { 800 VMEM_UNLOCK(vm); 801 return 0; 802 } 803 for (i = 0; i < oldhashsize; i++) { 804 while ((bt = LIST_FIRST(&oldhashlist[i])) != NULL) { 805 bt_rembusy(vm, bt); /* XXX */ 806 bt_insbusy(vm, bt); 807 } 808 } 809 VMEM_UNLOCK(vm); 810 811 if (oldhashlist != &vm->vm_hash0) { 812 xfree(oldhashlist, 813 sizeof(struct vmem_hashlist *) * oldhashsize); 814 } 815 816 return 0; 817 } 818 819 /* 820 * vmem_fit: check if a bt can satisfy the given restrictions. 821 * 822 * it's a caller's responsibility to ensure the region is big enough 823 * before calling us. 824 */ 825 826 static int 827 vmem_fit(const bt_t *bt, vmem_size_t size, vmem_size_t align, 828 vmem_size_t phase, vmem_size_t nocross, 829 vmem_addr_t minaddr, vmem_addr_t maxaddr, vmem_addr_t *addrp) 830 { 831 vmem_addr_t start; 832 vmem_addr_t end; 833 834 KASSERT(size > 0); 835 KASSERT(bt->bt_size >= size); /* caller's responsibility */ 836 837 /* 838 * XXX assumption: vmem_addr_t and vmem_size_t are 839 * unsigned integer of the same size. 840 */ 841 842 start = bt->bt_start; 843 if (start < minaddr) { 844 start = minaddr; 845 } 846 end = BT_END(bt); 847 if (end > maxaddr) { 848 end = maxaddr; 849 } 850 if (start > end) { 851 return ENOMEM; 852 } 853 854 start = VMEM_ALIGNUP(start - phase, align) + phase; 855 if (start < bt->bt_start) { 856 start += align; 857 } 858 if (VMEM_CROSS_P(start, start + size - 1, nocross)) { 859 KASSERT(align < nocross); 860 start = VMEM_ALIGNUP(start - phase, nocross) + phase; 861 } 862 if (start <= end && end - start >= size - 1) { 863 KASSERT((start & (align - 1)) == phase); 864 KASSERT(!VMEM_CROSS_P(start, start + size - 1, nocross)); 865 KASSERT(minaddr <= start); 866 KASSERT(maxaddr == 0 || start + size - 1 <= maxaddr); 867 KASSERT(bt->bt_start <= start); 868 KASSERT(BT_END(bt) - start >= size - 1); 869 *addrp = start; 870 return 0; 871 } 872 return ENOMEM; 873 } 874 875 /* ---- vmem API */ 876 877 /* 878 * vmem_create_internal: creates a vmem arena. 879 */ 880 881 vmem_t * 882 vmem_init(vmem_t *vm, const char *name, 883 vmem_addr_t base, vmem_size_t size, vmem_size_t quantum, 884 vmem_import_t *importfn, vmem_release_t *releasefn, 885 vmem_t *arg, vmem_size_t qcache_max, vm_flag_t flags, int ipl) 886 { 887 int i; 888 889 KASSERT((flags & (VM_SLEEP|VM_NOSLEEP)) != 0); 890 KASSERT((~flags & (VM_SLEEP|VM_NOSLEEP)) != 0); 891 KASSERT(quantum > 0); 892 893 #if defined(_KERNEL) 894 /* XXX: SMP, we get called early... */ 895 if (!vmem_bootstrapped) { 896 vmem_bootstrap(); 897 } 898 #endif /* defined(_KERNEL) */ 899 900 if (vm == NULL) { 901 vm = xmalloc(sizeof(*vm), flags); 902 } 903 if (vm == NULL) { 904 return NULL; 905 } 906 907 VMEM_CONDVAR_INIT(vm, "vmem"); 908 VMEM_LOCK_INIT(vm, ipl); 909 vm->vm_flags = flags; 910 vm->vm_nfreetags = 0; 911 LIST_INIT(&vm->vm_freetags); 912 strlcpy(vm->vm_name, name, sizeof(vm->vm_name)); 913 vm->vm_quantum_mask = quantum - 1; 914 vm->vm_quantum_shift = SIZE2ORDER(quantum); 915 KASSERT(ORDER2SIZE(vm->vm_quantum_shift) == quantum); 916 vm->vm_importfn = importfn; 917 vm->vm_releasefn = releasefn; 918 vm->vm_arg = arg; 919 vm->vm_nbusytag = 0; 920 vm->vm_size = 0; 921 vm->vm_inuse = 0; 922 #if defined(QCACHE) 923 qc_init(vm, qcache_max, ipl); 924 #endif /* defined(QCACHE) */ 925 926 TAILQ_INIT(&vm->vm_seglist); 927 for (i = 0; i < VMEM_MAXORDER; i++) { 928 LIST_INIT(&vm->vm_freelist[i]); 929 } 930 memset(&vm->vm_hash0, 0, sizeof(struct vmem_hashlist)); 931 vm->vm_hashsize = 1; 932 vm->vm_hashlist = &vm->vm_hash0; 933 934 if (size != 0) { 935 if (vmem_add(vm, base, size, flags) != 0) { 936 vmem_destroy1(vm); 937 return NULL; 938 } 939 } 940 941 #if defined(_KERNEL) 942 if (flags & VM_BOOTSTRAP) { 943 bt_refill(vm, VM_NOSLEEP); 944 } 945 946 mutex_enter(&vmem_list_lock); 947 LIST_INSERT_HEAD(&vmem_list, vm, vm_alllist); 948 mutex_exit(&vmem_list_lock); 949 #endif /* defined(_KERNEL) */ 950 951 return vm; 952 } 953 954 955 956 /* 957 * vmem_create: create an arena. 958 * 959 * => must not be called from interrupt context. 960 */ 961 962 vmem_t * 963 vmem_create(const char *name, vmem_addr_t base, vmem_size_t size, 964 vmem_size_t quantum, vmem_import_t *importfn, vmem_release_t *releasefn, 965 vmem_t *source, vmem_size_t qcache_max, vm_flag_t flags, int ipl) 966 { 967 968 KASSERT((flags & (VM_XIMPORT)) == 0); 969 970 return vmem_init(NULL, name, base, size, quantum, 971 importfn, releasefn, source, qcache_max, flags, ipl); 972 } 973 974 /* 975 * vmem_xcreate: create an arena takes alternative import func. 976 * 977 * => must not be called from interrupt context. 978 */ 979 980 vmem_t * 981 vmem_xcreate(const char *name, vmem_addr_t base, vmem_size_t size, 982 vmem_size_t quantum, vmem_ximport_t *importfn, vmem_release_t *releasefn, 983 vmem_t *source, vmem_size_t qcache_max, vm_flag_t flags, int ipl) 984 { 985 986 KASSERT((flags & (VM_XIMPORT)) == 0); 987 988 return vmem_init(NULL, name, base, size, quantum, 989 (vmem_import_t *)importfn, releasefn, source, 990 qcache_max, flags | VM_XIMPORT, ipl); 991 } 992 993 void 994 vmem_destroy(vmem_t *vm) 995 { 996 997 #if defined(_KERNEL) 998 mutex_enter(&vmem_list_lock); 999 LIST_REMOVE(vm, vm_alllist); 1000 mutex_exit(&vmem_list_lock); 1001 #endif /* defined(_KERNEL) */ 1002 1003 vmem_destroy1(vm); 1004 } 1005 1006 vmem_size_t 1007 vmem_roundup_size(vmem_t *vm, vmem_size_t size) 1008 { 1009 1010 return (size + vm->vm_quantum_mask) & ~vm->vm_quantum_mask; 1011 } 1012 1013 /* 1014 * vmem_alloc: allocate resource from the arena. 1015 */ 1016 1017 int 1018 vmem_alloc(vmem_t *vm, vmem_size_t size, vm_flag_t flags, vmem_addr_t *addrp) 1019 { 1020 const vm_flag_t strat __diagused = flags & VM_FITMASK; 1021 1022 KASSERT((flags & (VM_SLEEP|VM_NOSLEEP)) != 0); 1023 KASSERT((~flags & (VM_SLEEP|VM_NOSLEEP)) != 0); 1024 1025 KASSERT(size > 0); 1026 KASSERT(strat == VM_BESTFIT || strat == VM_INSTANTFIT); 1027 if ((flags & VM_SLEEP) != 0) { 1028 ASSERT_SLEEPABLE(); 1029 } 1030 1031 #if defined(QCACHE) 1032 if (size <= vm->vm_qcache_max) { 1033 void *p; 1034 int qidx = (size + vm->vm_quantum_mask) >> vm->vm_quantum_shift; 1035 qcache_t *qc = vm->vm_qcache[qidx - 1]; 1036 1037 p = pool_cache_get(qc->qc_cache, vmf_to_prf(flags)); 1038 if (addrp != NULL) 1039 *addrp = (vmem_addr_t)p; 1040 return (p == NULL) ? ENOMEM : 0; 1041 } 1042 #endif /* defined(QCACHE) */ 1043 1044 return vmem_xalloc(vm, size, 0, 0, 0, VMEM_ADDR_MIN, VMEM_ADDR_MAX, 1045 flags, addrp); 1046 } 1047 1048 int 1049 vmem_xalloc(vmem_t *vm, const vmem_size_t size0, vmem_size_t align, 1050 const vmem_size_t phase, const vmem_size_t nocross, 1051 const vmem_addr_t minaddr, const vmem_addr_t maxaddr, const vm_flag_t flags, 1052 vmem_addr_t *addrp) 1053 { 1054 struct vmem_freelist *list; 1055 struct vmem_freelist *first; 1056 struct vmem_freelist *end; 1057 bt_t *bt; 1058 bt_t *btnew; 1059 bt_t *btnew2; 1060 const vmem_size_t size = vmem_roundup_size(vm, size0); 1061 vm_flag_t strat = flags & VM_FITMASK; 1062 vmem_addr_t start; 1063 int rc; 1064 1065 KASSERT(size0 > 0); 1066 KASSERT(size > 0); 1067 KASSERT(strat == VM_BESTFIT || strat == VM_INSTANTFIT); 1068 if ((flags & VM_SLEEP) != 0) { 1069 ASSERT_SLEEPABLE(); 1070 } 1071 KASSERT((align & vm->vm_quantum_mask) == 0); 1072 KASSERT((align & (align - 1)) == 0); 1073 KASSERT((phase & vm->vm_quantum_mask) == 0); 1074 KASSERT((nocross & vm->vm_quantum_mask) == 0); 1075 KASSERT((nocross & (nocross - 1)) == 0); 1076 KASSERT((align == 0 && phase == 0) || phase < align); 1077 KASSERT(nocross == 0 || nocross >= size); 1078 KASSERT(minaddr <= maxaddr); 1079 KASSERT(!VMEM_CROSS_P(phase, phase + size - 1, nocross)); 1080 1081 if (align == 0) { 1082 align = vm->vm_quantum_mask + 1; 1083 } 1084 1085 /* 1086 * allocate boundary tags before acquiring the vmem lock. 1087 */ 1088 btnew = bt_alloc(vm, flags); 1089 if (btnew == NULL) { 1090 return ENOMEM; 1091 } 1092 btnew2 = bt_alloc(vm, flags); /* XXX not necessary if no restrictions */ 1093 if (btnew2 == NULL) { 1094 bt_free(vm, btnew); 1095 return ENOMEM; 1096 } 1097 1098 /* 1099 * choose a free block from which we allocate. 1100 */ 1101 retry_strat: 1102 first = bt_freehead_toalloc(vm, size, strat); 1103 end = &vm->vm_freelist[VMEM_MAXORDER]; 1104 retry: 1105 bt = NULL; 1106 VMEM_LOCK(vm); 1107 vmem_check(vm); 1108 if (strat == VM_INSTANTFIT) { 1109 /* 1110 * just choose the first block which satisfies our restrictions. 1111 * 1112 * note that we don't need to check the size of the blocks 1113 * because any blocks found on these list should be larger than 1114 * the given size. 1115 */ 1116 for (list = first; list < end; list++) { 1117 bt = LIST_FIRST(list); 1118 if (bt != NULL) { 1119 rc = vmem_fit(bt, size, align, phase, 1120 nocross, minaddr, maxaddr, &start); 1121 if (rc == 0) { 1122 goto gotit; 1123 } 1124 /* 1125 * don't bother to follow the bt_freelist link 1126 * here. the list can be very long and we are 1127 * told to run fast. blocks from the later free 1128 * lists are larger and have better chances to 1129 * satisfy our restrictions. 1130 */ 1131 } 1132 } 1133 } else { /* VM_BESTFIT */ 1134 /* 1135 * we assume that, for space efficiency, it's better to 1136 * allocate from a smaller block. thus we will start searching 1137 * from the lower-order list than VM_INSTANTFIT. 1138 * however, don't bother to find the smallest block in a free 1139 * list because the list can be very long. we can revisit it 1140 * if/when it turns out to be a problem. 1141 * 1142 * note that the 'first' list can contain blocks smaller than 1143 * the requested size. thus we need to check bt_size. 1144 */ 1145 for (list = first; list < end; list++) { 1146 LIST_FOREACH(bt, list, bt_freelist) { 1147 if (bt->bt_size >= size) { 1148 rc = vmem_fit(bt, size, align, phase, 1149 nocross, minaddr, maxaddr, &start); 1150 if (rc == 0) { 1151 goto gotit; 1152 } 1153 } 1154 } 1155 } 1156 } 1157 VMEM_UNLOCK(vm); 1158 #if 1 1159 if (strat == VM_INSTANTFIT) { 1160 strat = VM_BESTFIT; 1161 goto retry_strat; 1162 } 1163 #endif 1164 if (align != vm->vm_quantum_mask + 1 || phase != 0 || nocross != 0) { 1165 1166 /* 1167 * XXX should try to import a region large enough to 1168 * satisfy restrictions? 1169 */ 1170 1171 goto fail; 1172 } 1173 /* XXX eeek, minaddr & maxaddr not respected */ 1174 if (vmem_import(vm, size, flags) == 0) { 1175 goto retry; 1176 } 1177 /* XXX */ 1178 1179 if ((flags & VM_SLEEP) != 0) { 1180 #if defined(_KERNEL) 1181 mutex_spin_enter(&uvm_fpageqlock); 1182 uvm_kick_pdaemon(); 1183 mutex_spin_exit(&uvm_fpageqlock); 1184 #endif 1185 VMEM_LOCK(vm); 1186 VMEM_CONDVAR_WAIT(vm); 1187 VMEM_UNLOCK(vm); 1188 goto retry; 1189 } 1190 fail: 1191 bt_free(vm, btnew); 1192 bt_free(vm, btnew2); 1193 return ENOMEM; 1194 1195 gotit: 1196 KASSERT(bt->bt_type == BT_TYPE_FREE); 1197 KASSERT(bt->bt_size >= size); 1198 bt_remfree(vm, bt); 1199 vmem_check(vm); 1200 if (bt->bt_start != start) { 1201 btnew2->bt_type = BT_TYPE_FREE; 1202 btnew2->bt_start = bt->bt_start; 1203 btnew2->bt_size = start - bt->bt_start; 1204 bt->bt_start = start; 1205 bt->bt_size -= btnew2->bt_size; 1206 bt_insfree(vm, btnew2); 1207 bt_insseg(vm, btnew2, TAILQ_PREV(bt, vmem_seglist, bt_seglist)); 1208 btnew2 = NULL; 1209 vmem_check(vm); 1210 } 1211 KASSERT(bt->bt_start == start); 1212 if (bt->bt_size != size && bt->bt_size - size > vm->vm_quantum_mask) { 1213 /* split */ 1214 btnew->bt_type = BT_TYPE_BUSY; 1215 btnew->bt_start = bt->bt_start; 1216 btnew->bt_size = size; 1217 bt->bt_start = bt->bt_start + size; 1218 bt->bt_size -= size; 1219 bt_insfree(vm, bt); 1220 bt_insseg(vm, btnew, TAILQ_PREV(bt, vmem_seglist, bt_seglist)); 1221 bt_insbusy(vm, btnew); 1222 vmem_check(vm); 1223 VMEM_UNLOCK(vm); 1224 } else { 1225 bt->bt_type = BT_TYPE_BUSY; 1226 bt_insbusy(vm, bt); 1227 vmem_check(vm); 1228 VMEM_UNLOCK(vm); 1229 bt_free(vm, btnew); 1230 btnew = bt; 1231 } 1232 if (btnew2 != NULL) { 1233 bt_free(vm, btnew2); 1234 } 1235 KASSERT(btnew->bt_size >= size); 1236 btnew->bt_type = BT_TYPE_BUSY; 1237 1238 if (addrp != NULL) 1239 *addrp = btnew->bt_start; 1240 return 0; 1241 } 1242 1243 /* 1244 * vmem_free: free the resource to the arena. 1245 */ 1246 1247 void 1248 vmem_free(vmem_t *vm, vmem_addr_t addr, vmem_size_t size) 1249 { 1250 1251 KASSERT(size > 0); 1252 1253 #if defined(QCACHE) 1254 if (size <= vm->vm_qcache_max) { 1255 int qidx = (size + vm->vm_quantum_mask) >> vm->vm_quantum_shift; 1256 qcache_t *qc = vm->vm_qcache[qidx - 1]; 1257 1258 pool_cache_put(qc->qc_cache, (void *)addr); 1259 return; 1260 } 1261 #endif /* defined(QCACHE) */ 1262 1263 vmem_xfree(vm, addr, size); 1264 } 1265 1266 void 1267 vmem_xfree(vmem_t *vm, vmem_addr_t addr, vmem_size_t size) 1268 { 1269 bt_t *bt; 1270 bt_t *t; 1271 LIST_HEAD(, vmem_btag) tofree; 1272 1273 LIST_INIT(&tofree); 1274 1275 KASSERT(size > 0); 1276 1277 VMEM_LOCK(vm); 1278 1279 bt = bt_lookupbusy(vm, addr); 1280 KASSERT(bt != NULL); 1281 KASSERT(bt->bt_start == addr); 1282 KASSERT(bt->bt_size == vmem_roundup_size(vm, size) || 1283 bt->bt_size - vmem_roundup_size(vm, size) <= vm->vm_quantum_mask); 1284 KASSERT(bt->bt_type == BT_TYPE_BUSY); 1285 bt_rembusy(vm, bt); 1286 bt->bt_type = BT_TYPE_FREE; 1287 1288 /* coalesce */ 1289 t = TAILQ_NEXT(bt, bt_seglist); 1290 if (t != NULL && t->bt_type == BT_TYPE_FREE) { 1291 KASSERT(BT_END(bt) < t->bt_start); /* YYY */ 1292 bt_remfree(vm, t); 1293 bt_remseg(vm, t); 1294 bt->bt_size += t->bt_size; 1295 LIST_INSERT_HEAD(&tofree, t, bt_freelist); 1296 } 1297 t = TAILQ_PREV(bt, vmem_seglist, bt_seglist); 1298 if (t != NULL && t->bt_type == BT_TYPE_FREE) { 1299 KASSERT(BT_END(t) < bt->bt_start); /* YYY */ 1300 bt_remfree(vm, t); 1301 bt_remseg(vm, t); 1302 bt->bt_size += t->bt_size; 1303 bt->bt_start = t->bt_start; 1304 LIST_INSERT_HEAD(&tofree, t, bt_freelist); 1305 } 1306 1307 t = TAILQ_PREV(bt, vmem_seglist, bt_seglist); 1308 KASSERT(t != NULL); 1309 KASSERT(BT_ISSPAN_P(t) || t->bt_type == BT_TYPE_BUSY); 1310 if (vm->vm_releasefn != NULL && t->bt_type == BT_TYPE_SPAN && 1311 t->bt_size == bt->bt_size) { 1312 vmem_addr_t spanaddr; 1313 vmem_size_t spansize; 1314 1315 KASSERT(t->bt_start == bt->bt_start); 1316 spanaddr = bt->bt_start; 1317 spansize = bt->bt_size; 1318 bt_remseg(vm, bt); 1319 LIST_INSERT_HEAD(&tofree, bt, bt_freelist); 1320 bt_remseg(vm, t); 1321 LIST_INSERT_HEAD(&tofree, t, bt_freelist); 1322 vm->vm_size -= spansize; 1323 VMEM_CONDVAR_BROADCAST(vm); 1324 VMEM_UNLOCK(vm); 1325 (*vm->vm_releasefn)(vm->vm_arg, spanaddr, spansize); 1326 } else { 1327 bt_insfree(vm, bt); 1328 VMEM_CONDVAR_BROADCAST(vm); 1329 VMEM_UNLOCK(vm); 1330 } 1331 1332 while (!LIST_EMPTY(&tofree)) { 1333 t = LIST_FIRST(&tofree); 1334 LIST_REMOVE(t, bt_freelist); 1335 bt_free(vm, t); 1336 } 1337 1338 bt_freetrim(vm, BT_MAXFREE); 1339 } 1340 1341 /* 1342 * vmem_add: 1343 * 1344 * => caller must ensure appropriate spl, 1345 * if the arena can be accessed from interrupt context. 1346 */ 1347 1348 int 1349 vmem_add(vmem_t *vm, vmem_addr_t addr, vmem_size_t size, vm_flag_t flags) 1350 { 1351 1352 return vmem_add1(vm, addr, size, flags, BT_TYPE_SPAN_STATIC); 1353 } 1354 1355 /* 1356 * vmem_size: information about arenas size 1357 * 1358 * => return free/allocated size in arena 1359 */ 1360 vmem_size_t 1361 vmem_size(vmem_t *vm, int typemask) 1362 { 1363 1364 switch (typemask) { 1365 case VMEM_ALLOC: 1366 return vm->vm_inuse; 1367 case VMEM_FREE: 1368 return vm->vm_size - vm->vm_inuse; 1369 case VMEM_FREE|VMEM_ALLOC: 1370 return vm->vm_size; 1371 default: 1372 panic("vmem_size"); 1373 } 1374 } 1375 1376 /* ---- rehash */ 1377 1378 #if defined(_KERNEL) 1379 static struct callout vmem_rehash_ch; 1380 static int vmem_rehash_interval; 1381 static struct workqueue *vmem_rehash_wq; 1382 static struct work vmem_rehash_wk; 1383 1384 static void 1385 vmem_rehash_all(struct work *wk, void *dummy) 1386 { 1387 vmem_t *vm; 1388 1389 KASSERT(wk == &vmem_rehash_wk); 1390 mutex_enter(&vmem_list_lock); 1391 LIST_FOREACH(vm, &vmem_list, vm_alllist) { 1392 size_t desired; 1393 size_t current; 1394 1395 if (!VMEM_TRYLOCK(vm)) { 1396 continue; 1397 } 1398 desired = vm->vm_nbusytag; 1399 current = vm->vm_hashsize; 1400 VMEM_UNLOCK(vm); 1401 1402 if (desired > VMEM_HASHSIZE_MAX) { 1403 desired = VMEM_HASHSIZE_MAX; 1404 } else if (desired < VMEM_HASHSIZE_MIN) { 1405 desired = VMEM_HASHSIZE_MIN; 1406 } 1407 if (desired > current * 2 || desired * 2 < current) { 1408 vmem_rehash(vm, desired, VM_NOSLEEP); 1409 } 1410 } 1411 mutex_exit(&vmem_list_lock); 1412 1413 callout_schedule(&vmem_rehash_ch, vmem_rehash_interval); 1414 } 1415 1416 static void 1417 vmem_rehash_all_kick(void *dummy) 1418 { 1419 1420 workqueue_enqueue(vmem_rehash_wq, &vmem_rehash_wk, NULL); 1421 } 1422 1423 void 1424 vmem_rehash_start(void) 1425 { 1426 int error; 1427 1428 error = workqueue_create(&vmem_rehash_wq, "vmem_rehash", 1429 vmem_rehash_all, NULL, PRI_VM, IPL_SOFTCLOCK, WQ_MPSAFE); 1430 if (error) { 1431 panic("%s: workqueue_create %d\n", __func__, error); 1432 } 1433 callout_init(&vmem_rehash_ch, CALLOUT_MPSAFE); 1434 callout_setfunc(&vmem_rehash_ch, vmem_rehash_all_kick, NULL); 1435 1436 vmem_rehash_interval = hz * 10; 1437 callout_schedule(&vmem_rehash_ch, vmem_rehash_interval); 1438 } 1439 #endif /* defined(_KERNEL) */ 1440 1441 /* ---- debug */ 1442 1443 #if defined(DDB) || defined(UNITTEST) || defined(VMEM_SANITY) 1444 1445 static void bt_dump(const bt_t *, void (*)(const char *, ...) 1446 __printflike(1, 2)); 1447 1448 static const char * 1449 bt_type_string(int type) 1450 { 1451 static const char * const table[] = { 1452 [BT_TYPE_BUSY] = "busy", 1453 [BT_TYPE_FREE] = "free", 1454 [BT_TYPE_SPAN] = "span", 1455 [BT_TYPE_SPAN_STATIC] = "static span", 1456 }; 1457 1458 if (type >= __arraycount(table)) { 1459 return "BOGUS"; 1460 } 1461 return table[type]; 1462 } 1463 1464 static void 1465 bt_dump(const bt_t *bt, void (*pr)(const char *, ...)) 1466 { 1467 1468 (*pr)("\t%p: %" PRIu64 ", %" PRIu64 ", %d(%s)\n", 1469 bt, (uint64_t)bt->bt_start, (uint64_t)bt->bt_size, 1470 bt->bt_type, bt_type_string(bt->bt_type)); 1471 } 1472 1473 static void 1474 vmem_dump(const vmem_t *vm , void (*pr)(const char *, ...) __printflike(1, 2)) 1475 { 1476 const bt_t *bt; 1477 int i; 1478 1479 (*pr)("vmem %p '%s'\n", vm, vm->vm_name); 1480 TAILQ_FOREACH(bt, &vm->vm_seglist, bt_seglist) { 1481 bt_dump(bt, pr); 1482 } 1483 1484 for (i = 0; i < VMEM_MAXORDER; i++) { 1485 const struct vmem_freelist *fl = &vm->vm_freelist[i]; 1486 1487 if (LIST_EMPTY(fl)) { 1488 continue; 1489 } 1490 1491 (*pr)("freelist[%d]\n", i); 1492 LIST_FOREACH(bt, fl, bt_freelist) { 1493 bt_dump(bt, pr); 1494 } 1495 } 1496 } 1497 1498 #endif /* defined(DDB) || defined(UNITTEST) || defined(VMEM_SANITY) */ 1499 1500 #if defined(DDB) 1501 static bt_t * 1502 vmem_whatis_lookup(vmem_t *vm, uintptr_t addr) 1503 { 1504 bt_t *bt; 1505 1506 TAILQ_FOREACH(bt, &vm->vm_seglist, bt_seglist) { 1507 if (BT_ISSPAN_P(bt)) { 1508 continue; 1509 } 1510 if (bt->bt_start <= addr && addr <= BT_END(bt)) { 1511 return bt; 1512 } 1513 } 1514 1515 return NULL; 1516 } 1517 1518 void 1519 vmem_whatis(uintptr_t addr, void (*pr)(const char *, ...)) 1520 { 1521 vmem_t *vm; 1522 1523 LIST_FOREACH(vm, &vmem_list, vm_alllist) { 1524 bt_t *bt; 1525 1526 bt = vmem_whatis_lookup(vm, addr); 1527 if (bt == NULL) { 1528 continue; 1529 } 1530 (*pr)("%p is %p+%zu in VMEM '%s' (%s)\n", 1531 (void *)addr, (void *)bt->bt_start, 1532 (size_t)(addr - bt->bt_start), vm->vm_name, 1533 (bt->bt_type == BT_TYPE_BUSY) ? "allocated" : "free"); 1534 } 1535 } 1536 1537 void 1538 vmem_printall(const char *modif, void (*pr)(const char *, ...)) 1539 { 1540 const vmem_t *vm; 1541 1542 LIST_FOREACH(vm, &vmem_list, vm_alllist) { 1543 vmem_dump(vm, pr); 1544 } 1545 } 1546 1547 void 1548 vmem_print(uintptr_t addr, const char *modif, void (*pr)(const char *, ...)) 1549 { 1550 const vmem_t *vm = (const void *)addr; 1551 1552 vmem_dump(vm, pr); 1553 } 1554 #endif /* defined(DDB) */ 1555 1556 #if defined(_KERNEL) 1557 #define vmem_printf printf 1558 #else 1559 #include <stdio.h> 1560 #include <stdarg.h> 1561 1562 static void 1563 vmem_printf(const char *fmt, ...) 1564 { 1565 va_list ap; 1566 va_start(ap, fmt); 1567 vprintf(fmt, ap); 1568 va_end(ap); 1569 } 1570 #endif 1571 1572 #if defined(VMEM_SANITY) 1573 1574 static bool 1575 vmem_check_sanity(vmem_t *vm) 1576 { 1577 const bt_t *bt, *bt2; 1578 1579 KASSERT(vm != NULL); 1580 1581 TAILQ_FOREACH(bt, &vm->vm_seglist, bt_seglist) { 1582 if (bt->bt_start > BT_END(bt)) { 1583 printf("corrupted tag\n"); 1584 bt_dump(bt, vmem_printf); 1585 return false; 1586 } 1587 } 1588 TAILQ_FOREACH(bt, &vm->vm_seglist, bt_seglist) { 1589 TAILQ_FOREACH(bt2, &vm->vm_seglist, bt_seglist) { 1590 if (bt == bt2) { 1591 continue; 1592 } 1593 if (BT_ISSPAN_P(bt) != BT_ISSPAN_P(bt2)) { 1594 continue; 1595 } 1596 if (bt->bt_start <= BT_END(bt2) && 1597 bt2->bt_start <= BT_END(bt)) { 1598 printf("overwrapped tags\n"); 1599 bt_dump(bt, vmem_printf); 1600 bt_dump(bt2, vmem_printf); 1601 return false; 1602 } 1603 } 1604 } 1605 1606 return true; 1607 } 1608 1609 static void 1610 vmem_check(vmem_t *vm) 1611 { 1612 1613 if (!vmem_check_sanity(vm)) { 1614 panic("insanity vmem %p", vm); 1615 } 1616 } 1617 1618 #endif /* defined(VMEM_SANITY) */ 1619 1620 #if defined(UNITTEST) 1621 int 1622 main(void) 1623 { 1624 int rc; 1625 vmem_t *vm; 1626 vmem_addr_t p; 1627 struct reg { 1628 vmem_addr_t p; 1629 vmem_size_t sz; 1630 bool x; 1631 } *reg = NULL; 1632 int nreg = 0; 1633 int nalloc = 0; 1634 int nfree = 0; 1635 vmem_size_t total = 0; 1636 #if 1 1637 vm_flag_t strat = VM_INSTANTFIT; 1638 #else 1639 vm_flag_t strat = VM_BESTFIT; 1640 #endif 1641 1642 vm = vmem_create("test", 0, 0, 1, NULL, NULL, NULL, 0, VM_SLEEP, 1643 #ifdef _KERNEL 1644 IPL_NONE 1645 #else 1646 0 1647 #endif 1648 ); 1649 if (vm == NULL) { 1650 printf("vmem_create\n"); 1651 exit(EXIT_FAILURE); 1652 } 1653 vmem_dump(vm, vmem_printf); 1654 1655 rc = vmem_add(vm, 0, 50, VM_SLEEP); 1656 assert(rc == 0); 1657 rc = vmem_add(vm, 100, 200, VM_SLEEP); 1658 assert(rc == 0); 1659 rc = vmem_add(vm, 2000, 1, VM_SLEEP); 1660 assert(rc == 0); 1661 rc = vmem_add(vm, 40000, 65536, VM_SLEEP); 1662 assert(rc == 0); 1663 rc = vmem_add(vm, 10000, 10000, VM_SLEEP); 1664 assert(rc == 0); 1665 rc = vmem_add(vm, 500, 1000, VM_SLEEP); 1666 assert(rc == 0); 1667 rc = vmem_add(vm, 0xffffff00, 0x100, VM_SLEEP); 1668 assert(rc == 0); 1669 rc = vmem_xalloc(vm, 0x101, 0, 0, 0, 1670 0xffffff00, 0xffffffff, strat|VM_SLEEP, &p); 1671 assert(rc != 0); 1672 rc = vmem_xalloc(vm, 50, 0, 0, 0, 0, 49, strat|VM_SLEEP, &p); 1673 assert(rc == 0 && p == 0); 1674 vmem_xfree(vm, p, 50); 1675 rc = vmem_xalloc(vm, 25, 0, 0, 0, 0, 24, strat|VM_SLEEP, &p); 1676 assert(rc == 0 && p == 0); 1677 rc = vmem_xalloc(vm, 0x100, 0, 0, 0, 1678 0xffffff01, 0xffffffff, strat|VM_SLEEP, &p); 1679 assert(rc != 0); 1680 rc = vmem_xalloc(vm, 0x100, 0, 0, 0, 1681 0xffffff00, 0xfffffffe, strat|VM_SLEEP, &p); 1682 assert(rc != 0); 1683 rc = vmem_xalloc(vm, 0x100, 0, 0, 0, 1684 0xffffff00, 0xffffffff, strat|VM_SLEEP, &p); 1685 assert(rc == 0); 1686 vmem_dump(vm, vmem_printf); 1687 for (;;) { 1688 struct reg *r; 1689 int t = rand() % 100; 1690 1691 if (t > 45) { 1692 /* alloc */ 1693 vmem_size_t sz = rand() % 500 + 1; 1694 bool x; 1695 vmem_size_t align, phase, nocross; 1696 vmem_addr_t minaddr, maxaddr; 1697 1698 if (t > 70) { 1699 x = true; 1700 /* XXX */ 1701 align = 1 << (rand() % 15); 1702 phase = rand() % 65536; 1703 nocross = 1 << (rand() % 15); 1704 if (align <= phase) { 1705 phase = 0; 1706 } 1707 if (VMEM_CROSS_P(phase, phase + sz - 1, 1708 nocross)) { 1709 nocross = 0; 1710 } 1711 do { 1712 minaddr = rand() % 50000; 1713 maxaddr = rand() % 70000; 1714 } while (minaddr > maxaddr); 1715 printf("=== xalloc %" PRIu64 1716 " align=%" PRIu64 ", phase=%" PRIu64 1717 ", nocross=%" PRIu64 ", min=%" PRIu64 1718 ", max=%" PRIu64 "\n", 1719 (uint64_t)sz, 1720 (uint64_t)align, 1721 (uint64_t)phase, 1722 (uint64_t)nocross, 1723 (uint64_t)minaddr, 1724 (uint64_t)maxaddr); 1725 rc = vmem_xalloc(vm, sz, align, phase, nocross, 1726 minaddr, maxaddr, strat|VM_SLEEP, &p); 1727 } else { 1728 x = false; 1729 printf("=== alloc %" PRIu64 "\n", (uint64_t)sz); 1730 rc = vmem_alloc(vm, sz, strat|VM_SLEEP, &p); 1731 } 1732 printf("-> %" PRIu64 "\n", (uint64_t)p); 1733 vmem_dump(vm, vmem_printf); 1734 if (rc != 0) { 1735 if (x) { 1736 continue; 1737 } 1738 break; 1739 } 1740 nreg++; 1741 reg = realloc(reg, sizeof(*reg) * nreg); 1742 r = ®[nreg - 1]; 1743 r->p = p; 1744 r->sz = sz; 1745 r->x = x; 1746 total += sz; 1747 nalloc++; 1748 } else if (nreg != 0) { 1749 /* free */ 1750 r = ®[rand() % nreg]; 1751 printf("=== free %" PRIu64 ", %" PRIu64 "\n", 1752 (uint64_t)r->p, (uint64_t)r->sz); 1753 if (r->x) { 1754 vmem_xfree(vm, r->p, r->sz); 1755 } else { 1756 vmem_free(vm, r->p, r->sz); 1757 } 1758 total -= r->sz; 1759 vmem_dump(vm, vmem_printf); 1760 *r = reg[nreg - 1]; 1761 nreg--; 1762 nfree++; 1763 } 1764 printf("total=%" PRIu64 "\n", (uint64_t)total); 1765 } 1766 fprintf(stderr, "total=%" PRIu64 ", nalloc=%d, nfree=%d\n", 1767 (uint64_t)total, nalloc, nfree); 1768 exit(EXIT_SUCCESS); 1769 } 1770 #endif /* defined(UNITTEST) */ 1771