1 /* $NetBSD: subr_vmem.c,v 1.88 2014/02/17 20:40:06 para Exp $ */ 2 3 /*- 4 * Copyright (c)2006,2007,2008,2009 YAMAMOTO Takashi, 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 */ 28 29 /* 30 * reference: 31 * - Magazines and Vmem: Extending the Slab Allocator 32 * to Many CPUs and Arbitrary Resources 33 * http://www.usenix.org/event/usenix01/bonwick.html 34 * 35 * locking & the boundary tag pool: 36 * - A pool(9) is used for vmem boundary tags 37 * - During a pool get call the global vmem_btag_refill_lock is taken, 38 * to serialize access to the allocation reserve, but no other 39 * vmem arena locks. 40 * - During pool_put calls no vmem mutexes are locked. 41 * - pool_drain doesn't hold the pool's mutex while releasing memory to 42 * its backing therefore no interferance with any vmem mutexes. 43 * - The boundary tag pool is forced to put page headers into pool pages 44 * (PR_PHINPAGE) and not off page to avoid pool recursion. 45 * (due to sizeof(bt_t) it should be the case anyway) 46 */ 47 48 #include <sys/cdefs.h> 49 __KERNEL_RCSID(0, "$NetBSD: subr_vmem.c,v 1.88 2014/02/17 20:40:06 para Exp $"); 50 51 #if defined(_KERNEL) 52 #include "opt_ddb.h" 53 #endif /* defined(_KERNEL) */ 54 55 #include <sys/param.h> 56 #include <sys/hash.h> 57 #include <sys/queue.h> 58 #include <sys/bitops.h> 59 60 #if defined(_KERNEL) 61 #include <sys/systm.h> 62 #include <sys/kernel.h> /* hz */ 63 #include <sys/callout.h> 64 #include <sys/kmem.h> 65 #include <sys/pool.h> 66 #include <sys/vmem.h> 67 #include <sys/vmem_impl.h> 68 #include <sys/workqueue.h> 69 #include <sys/atomic.h> 70 #include <uvm/uvm.h> 71 #include <uvm/uvm_extern.h> 72 #include <uvm/uvm_km.h> 73 #include <uvm/uvm_page.h> 74 #include <uvm/uvm_pdaemon.h> 75 #else /* defined(_KERNEL) */ 76 #include <stdio.h> 77 #include <errno.h> 78 #include <assert.h> 79 #include <stdlib.h> 80 #include <string.h> 81 #include "../sys/vmem.h" 82 #include "../sys/vmem_impl.h" 83 #endif /* defined(_KERNEL) */ 84 85 86 #if defined(_KERNEL) 87 #include <sys/evcnt.h> 88 #define VMEM_EVCNT_DEFINE(name) \ 89 struct evcnt vmem_evcnt_##name = EVCNT_INITIALIZER(EVCNT_TYPE_MISC, NULL, \ 90 "vmem", #name); \ 91 EVCNT_ATTACH_STATIC(vmem_evcnt_##name); 92 #define VMEM_EVCNT_INCR(ev) vmem_evcnt_##ev.ev_count++ 93 #define VMEM_EVCNT_DECR(ev) vmem_evcnt_##ev.ev_count-- 94 95 VMEM_EVCNT_DEFINE(static_bt_count) 96 VMEM_EVCNT_DEFINE(static_bt_inuse) 97 98 #define VMEM_CONDVAR_INIT(vm, wchan) cv_init(&vm->vm_cv, wchan) 99 #define VMEM_CONDVAR_DESTROY(vm) cv_destroy(&vm->vm_cv) 100 #define VMEM_CONDVAR_WAIT(vm) cv_wait(&vm->vm_cv, &vm->vm_lock) 101 #define VMEM_CONDVAR_BROADCAST(vm) cv_broadcast(&vm->vm_cv) 102 103 #else /* defined(_KERNEL) */ 104 105 #define VMEM_EVCNT_INCR(ev) /* nothing */ 106 #define VMEM_EVCNT_DECR(ev) /* nothing */ 107 108 #define VMEM_CONDVAR_INIT(vm, wchan) /* nothing */ 109 #define VMEM_CONDVAR_DESTROY(vm) /* nothing */ 110 #define VMEM_CONDVAR_WAIT(vm) /* nothing */ 111 #define VMEM_CONDVAR_BROADCAST(vm) /* nothing */ 112 113 #define UNITTEST 114 #define KASSERT(a) assert(a) 115 #define mutex_init(a, b, c) /* nothing */ 116 #define mutex_destroy(a) /* nothing */ 117 #define mutex_enter(a) /* nothing */ 118 #define mutex_tryenter(a) true 119 #define mutex_exit(a) /* nothing */ 120 #define mutex_owned(a) /* nothing */ 121 #define ASSERT_SLEEPABLE() /* nothing */ 122 #define panic(...) printf(__VA_ARGS__); abort() 123 #endif /* defined(_KERNEL) */ 124 125 #if defined(VMEM_SANITY) 126 static void vmem_check(vmem_t *); 127 #else /* defined(VMEM_SANITY) */ 128 #define vmem_check(vm) /* nothing */ 129 #endif /* defined(VMEM_SANITY) */ 130 131 #define VMEM_HASHSIZE_MIN 1 /* XXX */ 132 #define VMEM_HASHSIZE_MAX 65536 /* XXX */ 133 #define VMEM_HASHSIZE_INIT 1 134 135 #define VM_FITMASK (VM_BESTFIT | VM_INSTANTFIT) 136 137 #if defined(_KERNEL) 138 static bool vmem_bootstrapped = false; 139 static kmutex_t vmem_list_lock; 140 static LIST_HEAD(, vmem) vmem_list = LIST_HEAD_INITIALIZER(vmem_list); 141 #endif /* defined(_KERNEL) */ 142 143 /* ---- misc */ 144 145 #define VMEM_LOCK(vm) mutex_enter(&vm->vm_lock) 146 #define VMEM_TRYLOCK(vm) mutex_tryenter(&vm->vm_lock) 147 #define VMEM_UNLOCK(vm) mutex_exit(&vm->vm_lock) 148 #define VMEM_LOCK_INIT(vm, ipl) mutex_init(&vm->vm_lock, MUTEX_DEFAULT, ipl) 149 #define VMEM_LOCK_DESTROY(vm) mutex_destroy(&vm->vm_lock) 150 #define VMEM_ASSERT_LOCKED(vm) KASSERT(mutex_owned(&vm->vm_lock)) 151 152 #define VMEM_ALIGNUP(addr, align) \ 153 (-(-(addr) & -(align))) 154 155 #define VMEM_CROSS_P(addr1, addr2, boundary) \ 156 ((((addr1) ^ (addr2)) & -(boundary)) != 0) 157 158 #define ORDER2SIZE(order) ((vmem_size_t)1 << (order)) 159 #define SIZE2ORDER(size) ((int)ilog2(size)) 160 161 #if !defined(_KERNEL) 162 #define xmalloc(sz, flags) malloc(sz) 163 #define xfree(p, sz) free(p) 164 #define bt_alloc(vm, flags) malloc(sizeof(bt_t)) 165 #define bt_free(vm, bt) free(bt) 166 #else /* defined(_KERNEL) */ 167 168 #define xmalloc(sz, flags) \ 169 kmem_alloc(sz, ((flags) & VM_SLEEP) ? KM_SLEEP : KM_NOSLEEP); 170 #define xfree(p, sz) kmem_free(p, sz); 171 172 /* 173 * BT_RESERVE calculation: 174 * we allocate memory for boundry tags with vmem, therefor we have 175 * to keep a reserve of bts used to allocated memory for bts. 176 * This reserve is 4 for each arena involved in allocating vmems memory. 177 * BT_MAXFREE: don't cache excessive counts of bts in arenas 178 */ 179 #define STATIC_BT_COUNT 200 180 #define BT_MINRESERVE 4 181 #define BT_MAXFREE 64 182 183 static struct vmem_btag static_bts[STATIC_BT_COUNT]; 184 static int static_bt_count = STATIC_BT_COUNT; 185 186 static struct vmem kmem_va_meta_arena_store; 187 vmem_t *kmem_va_meta_arena; 188 static struct vmem kmem_meta_arena_store; 189 vmem_t *kmem_meta_arena = NULL; 190 191 static kmutex_t vmem_btag_refill_lock; 192 static kmutex_t vmem_btag_lock; 193 static LIST_HEAD(, vmem_btag) vmem_btag_freelist; 194 static size_t vmem_btag_freelist_count = 0; 195 static struct pool vmem_btag_pool; 196 197 /* ---- boundary tag */ 198 199 static int bt_refill(vmem_t *vm, vm_flag_t flags); 200 201 static void * 202 pool_page_alloc_vmem_meta(struct pool *pp, int flags) 203 { 204 const vm_flag_t vflags = (flags & PR_WAITOK) ? VM_SLEEP: VM_NOSLEEP; 205 vmem_addr_t va; 206 int ret; 207 208 ret = vmem_alloc(kmem_meta_arena, pp->pr_alloc->pa_pagesz, 209 (vflags & ~VM_FITMASK) | VM_INSTANTFIT | VM_POPULATING, &va); 210 211 return ret ? NULL : (void *)va; 212 } 213 214 static void 215 pool_page_free_vmem_meta(struct pool *pp, void *v) 216 { 217 218 vmem_free(kmem_meta_arena, (vmem_addr_t)v, pp->pr_alloc->pa_pagesz); 219 } 220 221 /* allocator for vmem-pool metadata */ 222 struct pool_allocator pool_allocator_vmem_meta = { 223 .pa_alloc = pool_page_alloc_vmem_meta, 224 .pa_free = pool_page_free_vmem_meta, 225 .pa_pagesz = 0 226 }; 227 228 static int 229 bt_refill(vmem_t *vm, vm_flag_t flags) 230 { 231 bt_t *bt; 232 233 VMEM_LOCK(vm); 234 if (vm->vm_nfreetags > BT_MINRESERVE) { 235 VMEM_UNLOCK(vm); 236 return 0; 237 } 238 239 mutex_enter(&vmem_btag_lock); 240 while (!LIST_EMPTY(&vmem_btag_freelist) && 241 vm->vm_nfreetags <= BT_MINRESERVE) { 242 bt = LIST_FIRST(&vmem_btag_freelist); 243 LIST_REMOVE(bt, bt_freelist); 244 LIST_INSERT_HEAD(&vm->vm_freetags, bt, bt_freelist); 245 vm->vm_nfreetags++; 246 vmem_btag_freelist_count--; 247 VMEM_EVCNT_INCR(static_bt_inuse); 248 } 249 mutex_exit(&vmem_btag_lock); 250 251 while (vm->vm_nfreetags <= BT_MINRESERVE) { 252 VMEM_UNLOCK(vm); 253 mutex_enter(&vmem_btag_refill_lock); 254 bt = pool_get(&vmem_btag_pool, 255 (flags & VM_SLEEP) ? PR_WAITOK: PR_NOWAIT); 256 mutex_exit(&vmem_btag_refill_lock); 257 VMEM_LOCK(vm); 258 if (bt == NULL && (flags & VM_SLEEP) == 0) 259 break; 260 LIST_INSERT_HEAD(&vm->vm_freetags, bt, bt_freelist); 261 vm->vm_nfreetags++; 262 } 263 264 VMEM_UNLOCK(vm); 265 266 if (vm->vm_nfreetags == 0) { 267 return ENOMEM; 268 } 269 270 271 if (kmem_meta_arena != NULL) { 272 bt_refill(kmem_arena, (flags & ~VM_FITMASK) 273 | VM_INSTANTFIT | VM_POPULATING); 274 bt_refill(kmem_va_meta_arena, (flags & ~VM_FITMASK) 275 | VM_INSTANTFIT | VM_POPULATING); 276 bt_refill(kmem_meta_arena, (flags & ~VM_FITMASK) 277 | VM_INSTANTFIT | VM_POPULATING); 278 } 279 280 return 0; 281 } 282 283 static bt_t * 284 bt_alloc(vmem_t *vm, vm_flag_t flags) 285 { 286 bt_t *bt; 287 VMEM_LOCK(vm); 288 while (vm->vm_nfreetags <= BT_MINRESERVE && (flags & VM_POPULATING) == 0) { 289 VMEM_UNLOCK(vm); 290 if (bt_refill(vm, VM_NOSLEEP | VM_INSTANTFIT)) { 291 return NULL; 292 } 293 VMEM_LOCK(vm); 294 } 295 bt = LIST_FIRST(&vm->vm_freetags); 296 LIST_REMOVE(bt, bt_freelist); 297 vm->vm_nfreetags--; 298 VMEM_UNLOCK(vm); 299 300 return bt; 301 } 302 303 static void 304 bt_free(vmem_t *vm, bt_t *bt) 305 { 306 307 VMEM_LOCK(vm); 308 LIST_INSERT_HEAD(&vm->vm_freetags, bt, bt_freelist); 309 vm->vm_nfreetags++; 310 VMEM_UNLOCK(vm); 311 } 312 313 static void 314 bt_freetrim(vmem_t *vm, int freelimit) 315 { 316 bt_t *t; 317 LIST_HEAD(, vmem_btag) tofree; 318 319 LIST_INIT(&tofree); 320 321 VMEM_LOCK(vm); 322 while (vm->vm_nfreetags > freelimit) { 323 bt_t *bt = LIST_FIRST(&vm->vm_freetags); 324 LIST_REMOVE(bt, bt_freelist); 325 vm->vm_nfreetags--; 326 if (bt >= static_bts 327 && bt < static_bts + sizeof(static_bts)) { 328 mutex_enter(&vmem_btag_lock); 329 LIST_INSERT_HEAD(&vmem_btag_freelist, bt, bt_freelist); 330 vmem_btag_freelist_count++; 331 mutex_exit(&vmem_btag_lock); 332 VMEM_EVCNT_DECR(static_bt_inuse); 333 } else { 334 LIST_INSERT_HEAD(&tofree, bt, bt_freelist); 335 } 336 } 337 338 VMEM_UNLOCK(vm); 339 while (!LIST_EMPTY(&tofree)) { 340 t = LIST_FIRST(&tofree); 341 LIST_REMOVE(t, bt_freelist); 342 pool_put(&vmem_btag_pool, t); 343 } 344 } 345 #endif /* defined(_KERNEL) */ 346 347 /* 348 * freelist[0] ... [1, 1] 349 * freelist[1] ... [2, 3] 350 * freelist[2] ... [4, 7] 351 * freelist[3] ... [8, 15] 352 * : 353 * freelist[n] ... [(1 << n), (1 << (n + 1)) - 1] 354 * : 355 */ 356 357 static struct vmem_freelist * 358 bt_freehead_tofree(vmem_t *vm, vmem_size_t size) 359 { 360 const vmem_size_t qsize = size >> vm->vm_quantum_shift; 361 const int idx = SIZE2ORDER(qsize); 362 363 KASSERT(size != 0 && qsize != 0); 364 KASSERT((size & vm->vm_quantum_mask) == 0); 365 KASSERT(idx >= 0); 366 KASSERT(idx < VMEM_MAXORDER); 367 368 return &vm->vm_freelist[idx]; 369 } 370 371 /* 372 * bt_freehead_toalloc: return the freelist for the given size and allocation 373 * strategy. 374 * 375 * for VM_INSTANTFIT, return the list in which any blocks are large enough 376 * for the requested size. otherwise, return the list which can have blocks 377 * large enough for the requested size. 378 */ 379 380 static struct vmem_freelist * 381 bt_freehead_toalloc(vmem_t *vm, vmem_size_t size, vm_flag_t strat) 382 { 383 const vmem_size_t qsize = size >> vm->vm_quantum_shift; 384 int idx = SIZE2ORDER(qsize); 385 386 KASSERT(size != 0 && qsize != 0); 387 KASSERT((size & vm->vm_quantum_mask) == 0); 388 389 if (strat == VM_INSTANTFIT && ORDER2SIZE(idx) != qsize) { 390 idx++; 391 /* check too large request? */ 392 } 393 KASSERT(idx >= 0); 394 KASSERT(idx < VMEM_MAXORDER); 395 396 return &vm->vm_freelist[idx]; 397 } 398 399 /* ---- boundary tag hash */ 400 401 static struct vmem_hashlist * 402 bt_hashhead(vmem_t *vm, vmem_addr_t addr) 403 { 404 struct vmem_hashlist *list; 405 unsigned int hash; 406 407 hash = hash32_buf(&addr, sizeof(addr), HASH32_BUF_INIT); 408 list = &vm->vm_hashlist[hash % vm->vm_hashsize]; 409 410 return list; 411 } 412 413 static bt_t * 414 bt_lookupbusy(vmem_t *vm, vmem_addr_t addr) 415 { 416 struct vmem_hashlist *list; 417 bt_t *bt; 418 419 list = bt_hashhead(vm, addr); 420 LIST_FOREACH(bt, list, bt_hashlist) { 421 if (bt->bt_start == addr) { 422 break; 423 } 424 } 425 426 return bt; 427 } 428 429 static void 430 bt_rembusy(vmem_t *vm, bt_t *bt) 431 { 432 433 KASSERT(vm->vm_nbusytag > 0); 434 vm->vm_inuse -= bt->bt_size; 435 vm->vm_nbusytag--; 436 LIST_REMOVE(bt, bt_hashlist); 437 } 438 439 static void 440 bt_insbusy(vmem_t *vm, bt_t *bt) 441 { 442 struct vmem_hashlist *list; 443 444 KASSERT(bt->bt_type == BT_TYPE_BUSY); 445 446 list = bt_hashhead(vm, bt->bt_start); 447 LIST_INSERT_HEAD(list, bt, bt_hashlist); 448 vm->vm_nbusytag++; 449 vm->vm_inuse += bt->bt_size; 450 } 451 452 /* ---- boundary tag list */ 453 454 static void 455 bt_remseg(vmem_t *vm, bt_t *bt) 456 { 457 458 TAILQ_REMOVE(&vm->vm_seglist, bt, bt_seglist); 459 } 460 461 static void 462 bt_insseg(vmem_t *vm, bt_t *bt, bt_t *prev) 463 { 464 465 TAILQ_INSERT_AFTER(&vm->vm_seglist, prev, bt, bt_seglist); 466 } 467 468 static void 469 bt_insseg_tail(vmem_t *vm, bt_t *bt) 470 { 471 472 TAILQ_INSERT_TAIL(&vm->vm_seglist, bt, bt_seglist); 473 } 474 475 static void 476 bt_remfree(vmem_t *vm, bt_t *bt) 477 { 478 479 KASSERT(bt->bt_type == BT_TYPE_FREE); 480 481 LIST_REMOVE(bt, bt_freelist); 482 } 483 484 static void 485 bt_insfree(vmem_t *vm, bt_t *bt) 486 { 487 struct vmem_freelist *list; 488 489 list = bt_freehead_tofree(vm, bt->bt_size); 490 LIST_INSERT_HEAD(list, bt, bt_freelist); 491 } 492 493 /* ---- vmem internal functions */ 494 495 #if defined(QCACHE) 496 static inline vm_flag_t 497 prf_to_vmf(int prflags) 498 { 499 vm_flag_t vmflags; 500 501 KASSERT((prflags & ~(PR_LIMITFAIL | PR_WAITOK | PR_NOWAIT)) == 0); 502 if ((prflags & PR_WAITOK) != 0) { 503 vmflags = VM_SLEEP; 504 } else { 505 vmflags = VM_NOSLEEP; 506 } 507 return vmflags; 508 } 509 510 static inline int 511 vmf_to_prf(vm_flag_t vmflags) 512 { 513 int prflags; 514 515 if ((vmflags & VM_SLEEP) != 0) { 516 prflags = PR_WAITOK; 517 } else { 518 prflags = PR_NOWAIT; 519 } 520 return prflags; 521 } 522 523 static size_t 524 qc_poolpage_size(size_t qcache_max) 525 { 526 int i; 527 528 for (i = 0; ORDER2SIZE(i) <= qcache_max * 3; i++) { 529 /* nothing */ 530 } 531 return ORDER2SIZE(i); 532 } 533 534 static void * 535 qc_poolpage_alloc(struct pool *pool, int prflags) 536 { 537 qcache_t *qc = QC_POOL_TO_QCACHE(pool); 538 vmem_t *vm = qc->qc_vmem; 539 vmem_addr_t addr; 540 541 if (vmem_alloc(vm, pool->pr_alloc->pa_pagesz, 542 prf_to_vmf(prflags) | VM_INSTANTFIT, &addr) != 0) 543 return NULL; 544 return (void *)addr; 545 } 546 547 static void 548 qc_poolpage_free(struct pool *pool, void *addr) 549 { 550 qcache_t *qc = QC_POOL_TO_QCACHE(pool); 551 vmem_t *vm = qc->qc_vmem; 552 553 vmem_free(vm, (vmem_addr_t)addr, pool->pr_alloc->pa_pagesz); 554 } 555 556 static void 557 qc_init(vmem_t *vm, size_t qcache_max, int ipl) 558 { 559 qcache_t *prevqc; 560 struct pool_allocator *pa; 561 int qcache_idx_max; 562 int i; 563 564 KASSERT((qcache_max & vm->vm_quantum_mask) == 0); 565 if (qcache_max > (VMEM_QCACHE_IDX_MAX << vm->vm_quantum_shift)) { 566 qcache_max = VMEM_QCACHE_IDX_MAX << vm->vm_quantum_shift; 567 } 568 vm->vm_qcache_max = qcache_max; 569 pa = &vm->vm_qcache_allocator; 570 memset(pa, 0, sizeof(*pa)); 571 pa->pa_alloc = qc_poolpage_alloc; 572 pa->pa_free = qc_poolpage_free; 573 pa->pa_pagesz = qc_poolpage_size(qcache_max); 574 575 qcache_idx_max = qcache_max >> vm->vm_quantum_shift; 576 prevqc = NULL; 577 for (i = qcache_idx_max; i > 0; i--) { 578 qcache_t *qc = &vm->vm_qcache_store[i - 1]; 579 size_t size = i << vm->vm_quantum_shift; 580 pool_cache_t pc; 581 582 qc->qc_vmem = vm; 583 snprintf(qc->qc_name, sizeof(qc->qc_name), "%s-%zu", 584 vm->vm_name, size); 585 586 pc = pool_cache_init(size, 587 ORDER2SIZE(vm->vm_quantum_shift), 0, 588 PR_NOALIGN | PR_NOTOUCH | PR_RECURSIVE /* XXX */, 589 qc->qc_name, pa, ipl, NULL, NULL, NULL); 590 591 KASSERT(pc); 592 593 qc->qc_cache = pc; 594 KASSERT(qc->qc_cache != NULL); /* XXX */ 595 if (prevqc != NULL && 596 qc->qc_cache->pc_pool.pr_itemsperpage == 597 prevqc->qc_cache->pc_pool.pr_itemsperpage) { 598 pool_cache_destroy(qc->qc_cache); 599 vm->vm_qcache[i - 1] = prevqc; 600 continue; 601 } 602 qc->qc_cache->pc_pool.pr_qcache = qc; 603 vm->vm_qcache[i - 1] = qc; 604 prevqc = qc; 605 } 606 } 607 608 static void 609 qc_destroy(vmem_t *vm) 610 { 611 const qcache_t *prevqc; 612 int i; 613 int qcache_idx_max; 614 615 qcache_idx_max = vm->vm_qcache_max >> vm->vm_quantum_shift; 616 prevqc = NULL; 617 for (i = 0; i < qcache_idx_max; i++) { 618 qcache_t *qc = vm->vm_qcache[i]; 619 620 if (prevqc == qc) { 621 continue; 622 } 623 pool_cache_destroy(qc->qc_cache); 624 prevqc = qc; 625 } 626 } 627 #endif 628 629 #if defined(_KERNEL) 630 static void 631 vmem_bootstrap(void) 632 { 633 634 mutex_init(&vmem_list_lock, MUTEX_DEFAULT, IPL_VM); 635 mutex_init(&vmem_btag_lock, MUTEX_DEFAULT, IPL_VM); 636 mutex_init(&vmem_btag_refill_lock, MUTEX_DEFAULT, IPL_VM); 637 638 while (static_bt_count-- > 0) { 639 bt_t *bt = &static_bts[static_bt_count]; 640 LIST_INSERT_HEAD(&vmem_btag_freelist, bt, bt_freelist); 641 VMEM_EVCNT_INCR(static_bt_count); 642 vmem_btag_freelist_count++; 643 } 644 vmem_bootstrapped = TRUE; 645 } 646 647 void 648 vmem_subsystem_init(vmem_t *vm) 649 { 650 651 kmem_va_meta_arena = vmem_init(&kmem_va_meta_arena_store, "vmem-va", 652 0, 0, PAGE_SIZE, vmem_alloc, vmem_free, vm, 653 0, VM_NOSLEEP | VM_BOOTSTRAP | VM_LARGEIMPORT, 654 IPL_VM); 655 656 kmem_meta_arena = vmem_init(&kmem_meta_arena_store, "vmem-meta", 657 0, 0, PAGE_SIZE, 658 uvm_km_kmem_alloc, uvm_km_kmem_free, kmem_va_meta_arena, 659 0, VM_NOSLEEP | VM_BOOTSTRAP, IPL_VM); 660 661 pool_init(&vmem_btag_pool, sizeof(bt_t), 0, 0, PR_PHINPAGE, 662 "vmembt", &pool_allocator_vmem_meta, IPL_VM); 663 } 664 #endif /* defined(_KERNEL) */ 665 666 static int 667 vmem_add1(vmem_t *vm, vmem_addr_t addr, vmem_size_t size, vm_flag_t flags, 668 int spanbttype) 669 { 670 bt_t *btspan; 671 bt_t *btfree; 672 673 KASSERT((flags & (VM_SLEEP|VM_NOSLEEP)) != 0); 674 KASSERT((~flags & (VM_SLEEP|VM_NOSLEEP)) != 0); 675 KASSERT(spanbttype == BT_TYPE_SPAN || 676 spanbttype == BT_TYPE_SPAN_STATIC); 677 678 btspan = bt_alloc(vm, flags); 679 if (btspan == NULL) { 680 return ENOMEM; 681 } 682 btfree = bt_alloc(vm, flags); 683 if (btfree == NULL) { 684 bt_free(vm, btspan); 685 return ENOMEM; 686 } 687 688 btspan->bt_type = spanbttype; 689 btspan->bt_start = addr; 690 btspan->bt_size = size; 691 692 btfree->bt_type = BT_TYPE_FREE; 693 btfree->bt_start = addr; 694 btfree->bt_size = size; 695 696 VMEM_LOCK(vm); 697 bt_insseg_tail(vm, btspan); 698 bt_insseg(vm, btfree, btspan); 699 bt_insfree(vm, btfree); 700 vm->vm_size += size; 701 VMEM_UNLOCK(vm); 702 703 return 0; 704 } 705 706 static void 707 vmem_destroy1(vmem_t *vm) 708 { 709 710 #if defined(QCACHE) 711 qc_destroy(vm); 712 #endif /* defined(QCACHE) */ 713 if (vm->vm_hashlist != NULL) { 714 int i; 715 716 for (i = 0; i < vm->vm_hashsize; i++) { 717 bt_t *bt; 718 719 while ((bt = LIST_FIRST(&vm->vm_hashlist[i])) != NULL) { 720 KASSERT(bt->bt_type == BT_TYPE_SPAN_STATIC); 721 bt_free(vm, bt); 722 } 723 } 724 if (vm->vm_hashlist != &vm->vm_hash0) { 725 xfree(vm->vm_hashlist, 726 sizeof(struct vmem_hashlist *) * vm->vm_hashsize); 727 } 728 } 729 730 bt_freetrim(vm, 0); 731 732 VMEM_CONDVAR_DESTROY(vm); 733 VMEM_LOCK_DESTROY(vm); 734 xfree(vm, sizeof(*vm)); 735 } 736 737 static int 738 vmem_import(vmem_t *vm, vmem_size_t size, vm_flag_t flags) 739 { 740 vmem_addr_t addr; 741 int rc; 742 743 if (vm->vm_importfn == NULL) { 744 return EINVAL; 745 } 746 747 if (vm->vm_flags & VM_LARGEIMPORT) { 748 size *= 16; 749 } 750 751 if (vm->vm_flags & VM_XIMPORT) { 752 rc = ((vmem_ximport_t *)vm->vm_importfn)(vm->vm_arg, size, 753 &size, flags, &addr); 754 } else { 755 rc = (vm->vm_importfn)(vm->vm_arg, size, flags, &addr); 756 } 757 if (rc) { 758 return ENOMEM; 759 } 760 761 if (vmem_add1(vm, addr, size, flags, BT_TYPE_SPAN) != 0) { 762 (*vm->vm_releasefn)(vm->vm_arg, addr, size); 763 return ENOMEM; 764 } 765 766 return 0; 767 } 768 769 static int 770 vmem_rehash(vmem_t *vm, size_t newhashsize, vm_flag_t flags) 771 { 772 bt_t *bt; 773 int i; 774 struct vmem_hashlist *newhashlist; 775 struct vmem_hashlist *oldhashlist; 776 size_t oldhashsize; 777 778 KASSERT(newhashsize > 0); 779 780 newhashlist = 781 xmalloc(sizeof(struct vmem_hashlist *) * newhashsize, flags); 782 if (newhashlist == NULL) { 783 return ENOMEM; 784 } 785 for (i = 0; i < newhashsize; i++) { 786 LIST_INIT(&newhashlist[i]); 787 } 788 789 if (!VMEM_TRYLOCK(vm)) { 790 xfree(newhashlist, 791 sizeof(struct vmem_hashlist *) * newhashsize); 792 return EBUSY; 793 } 794 oldhashlist = vm->vm_hashlist; 795 oldhashsize = vm->vm_hashsize; 796 vm->vm_hashlist = newhashlist; 797 vm->vm_hashsize = newhashsize; 798 if (oldhashlist == NULL) { 799 VMEM_UNLOCK(vm); 800 return 0; 801 } 802 for (i = 0; i < oldhashsize; i++) { 803 while ((bt = LIST_FIRST(&oldhashlist[i])) != NULL) { 804 bt_rembusy(vm, bt); /* XXX */ 805 bt_insbusy(vm, bt); 806 } 807 } 808 VMEM_UNLOCK(vm); 809 810 if (oldhashlist != &vm->vm_hash0) { 811 xfree(oldhashlist, 812 sizeof(struct vmem_hashlist *) * oldhashsize); 813 } 814 815 return 0; 816 } 817 818 /* 819 * vmem_fit: check if a bt can satisfy the given restrictions. 820 * 821 * it's a caller's responsibility to ensure the region is big enough 822 * before calling us. 823 */ 824 825 static int 826 vmem_fit(const bt_t *bt, vmem_size_t size, vmem_size_t align, 827 vmem_size_t phase, vmem_size_t nocross, 828 vmem_addr_t minaddr, vmem_addr_t maxaddr, vmem_addr_t *addrp) 829 { 830 vmem_addr_t start; 831 vmem_addr_t end; 832 833 KASSERT(size > 0); 834 KASSERT(bt->bt_size >= size); /* caller's responsibility */ 835 836 /* 837 * XXX assumption: vmem_addr_t and vmem_size_t are 838 * unsigned integer of the same size. 839 */ 840 841 start = bt->bt_start; 842 if (start < minaddr) { 843 start = minaddr; 844 } 845 end = BT_END(bt); 846 if (end > maxaddr) { 847 end = maxaddr; 848 } 849 if (start > end) { 850 return ENOMEM; 851 } 852 853 start = VMEM_ALIGNUP(start - phase, align) + phase; 854 if (start < bt->bt_start) { 855 start += align; 856 } 857 if (VMEM_CROSS_P(start, start + size - 1, nocross)) { 858 KASSERT(align < nocross); 859 start = VMEM_ALIGNUP(start - phase, nocross) + phase; 860 } 861 if (start <= end && end - start >= size - 1) { 862 KASSERT((start & (align - 1)) == phase); 863 KASSERT(!VMEM_CROSS_P(start, start + size - 1, nocross)); 864 KASSERT(minaddr <= start); 865 KASSERT(maxaddr == 0 || start + size - 1 <= maxaddr); 866 KASSERT(bt->bt_start <= start); 867 KASSERT(BT_END(bt) - start >= size - 1); 868 *addrp = start; 869 return 0; 870 } 871 return ENOMEM; 872 } 873 874 /* ---- vmem API */ 875 876 /* 877 * vmem_create_internal: creates a vmem arena. 878 */ 879 880 vmem_t * 881 vmem_init(vmem_t *vm, const char *name, 882 vmem_addr_t base, vmem_size_t size, vmem_size_t quantum, 883 vmem_import_t *importfn, vmem_release_t *releasefn, 884 vmem_t *arg, vmem_size_t qcache_max, vm_flag_t flags, int ipl) 885 { 886 int i; 887 888 KASSERT((flags & (VM_SLEEP|VM_NOSLEEP)) != 0); 889 KASSERT((~flags & (VM_SLEEP|VM_NOSLEEP)) != 0); 890 KASSERT(quantum > 0); 891 892 #if defined(_KERNEL) 893 /* XXX: SMP, we get called early... */ 894 if (!vmem_bootstrapped) { 895 vmem_bootstrap(); 896 } 897 #endif /* defined(_KERNEL) */ 898 899 if (vm == NULL) { 900 vm = xmalloc(sizeof(*vm), flags); 901 } 902 if (vm == NULL) { 903 return NULL; 904 } 905 906 VMEM_CONDVAR_INIT(vm, "vmem"); 907 VMEM_LOCK_INIT(vm, ipl); 908 vm->vm_flags = flags; 909 vm->vm_nfreetags = 0; 910 LIST_INIT(&vm->vm_freetags); 911 strlcpy(vm->vm_name, name, sizeof(vm->vm_name)); 912 vm->vm_quantum_mask = quantum - 1; 913 vm->vm_quantum_shift = SIZE2ORDER(quantum); 914 KASSERT(ORDER2SIZE(vm->vm_quantum_shift) == quantum); 915 vm->vm_importfn = importfn; 916 vm->vm_releasefn = releasefn; 917 vm->vm_arg = arg; 918 vm->vm_nbusytag = 0; 919 vm->vm_size = 0; 920 vm->vm_inuse = 0; 921 #if defined(QCACHE) 922 qc_init(vm, qcache_max, ipl); 923 #endif /* defined(QCACHE) */ 924 925 TAILQ_INIT(&vm->vm_seglist); 926 for (i = 0; i < VMEM_MAXORDER; i++) { 927 LIST_INIT(&vm->vm_freelist[i]); 928 } 929 memset(&vm->vm_hash0, 0, sizeof(struct vmem_hashlist)); 930 vm->vm_hashsize = 1; 931 vm->vm_hashlist = &vm->vm_hash0; 932 933 if (size != 0) { 934 if (vmem_add(vm, base, size, flags) != 0) { 935 vmem_destroy1(vm); 936 return NULL; 937 } 938 } 939 940 #if defined(_KERNEL) 941 if (flags & VM_BOOTSTRAP) { 942 bt_refill(vm, VM_NOSLEEP); 943 } 944 945 mutex_enter(&vmem_list_lock); 946 LIST_INSERT_HEAD(&vmem_list, vm, vm_alllist); 947 mutex_exit(&vmem_list_lock); 948 #endif /* defined(_KERNEL) */ 949 950 return vm; 951 } 952 953 954 955 /* 956 * vmem_create: create an arena. 957 * 958 * => must not be called from interrupt context. 959 */ 960 961 vmem_t * 962 vmem_create(const char *name, vmem_addr_t base, vmem_size_t size, 963 vmem_size_t quantum, vmem_import_t *importfn, vmem_release_t *releasefn, 964 vmem_t *source, vmem_size_t qcache_max, vm_flag_t flags, int ipl) 965 { 966 967 KASSERT((flags & (VM_XIMPORT)) == 0); 968 969 return vmem_init(NULL, name, base, size, quantum, 970 importfn, releasefn, source, qcache_max, flags, ipl); 971 } 972 973 /* 974 * vmem_xcreate: create an arena takes alternative import func. 975 * 976 * => must not be called from interrupt context. 977 */ 978 979 vmem_t * 980 vmem_xcreate(const char *name, vmem_addr_t base, vmem_size_t size, 981 vmem_size_t quantum, vmem_ximport_t *importfn, vmem_release_t *releasefn, 982 vmem_t *source, vmem_size_t qcache_max, vm_flag_t flags, int ipl) 983 { 984 985 KASSERT((flags & (VM_XIMPORT)) == 0); 986 987 return vmem_init(NULL, name, base, size, quantum, 988 (vmem_import_t *)importfn, releasefn, source, 989 qcache_max, flags | VM_XIMPORT, ipl); 990 } 991 992 void 993 vmem_destroy(vmem_t *vm) 994 { 995 996 #if defined(_KERNEL) 997 mutex_enter(&vmem_list_lock); 998 LIST_REMOVE(vm, vm_alllist); 999 mutex_exit(&vmem_list_lock); 1000 #endif /* defined(_KERNEL) */ 1001 1002 vmem_destroy1(vm); 1003 } 1004 1005 vmem_size_t 1006 vmem_roundup_size(vmem_t *vm, vmem_size_t size) 1007 { 1008 1009 return (size + vm->vm_quantum_mask) & ~vm->vm_quantum_mask; 1010 } 1011 1012 /* 1013 * vmem_alloc: allocate resource from the arena. 1014 */ 1015 1016 int 1017 vmem_alloc(vmem_t *vm, vmem_size_t size, vm_flag_t flags, vmem_addr_t *addrp) 1018 { 1019 const vm_flag_t strat __diagused = flags & VM_FITMASK; 1020 1021 KASSERT((flags & (VM_SLEEP|VM_NOSLEEP)) != 0); 1022 KASSERT((~flags & (VM_SLEEP|VM_NOSLEEP)) != 0); 1023 1024 KASSERT(size > 0); 1025 KASSERT(strat == VM_BESTFIT || strat == VM_INSTANTFIT); 1026 if ((flags & VM_SLEEP) != 0) { 1027 ASSERT_SLEEPABLE(); 1028 } 1029 1030 #if defined(QCACHE) 1031 if (size <= vm->vm_qcache_max) { 1032 void *p; 1033 int qidx = (size + vm->vm_quantum_mask) >> vm->vm_quantum_shift; 1034 qcache_t *qc = vm->vm_qcache[qidx - 1]; 1035 1036 p = pool_cache_get(qc->qc_cache, vmf_to_prf(flags)); 1037 if (addrp != NULL) 1038 *addrp = (vmem_addr_t)p; 1039 return (p == NULL) ? ENOMEM : 0; 1040 } 1041 #endif /* defined(QCACHE) */ 1042 1043 return vmem_xalloc(vm, size, 0, 0, 0, VMEM_ADDR_MIN, VMEM_ADDR_MAX, 1044 flags, addrp); 1045 } 1046 1047 int 1048 vmem_xalloc(vmem_t *vm, const vmem_size_t size0, vmem_size_t align, 1049 const vmem_size_t phase, const vmem_size_t nocross, 1050 const vmem_addr_t minaddr, const vmem_addr_t maxaddr, const vm_flag_t flags, 1051 vmem_addr_t *addrp) 1052 { 1053 struct vmem_freelist *list; 1054 struct vmem_freelist *first; 1055 struct vmem_freelist *end; 1056 bt_t *bt; 1057 bt_t *btnew; 1058 bt_t *btnew2; 1059 const vmem_size_t size = vmem_roundup_size(vm, size0); 1060 vm_flag_t strat = flags & VM_FITMASK; 1061 vmem_addr_t start; 1062 int rc; 1063 1064 KASSERT(size0 > 0); 1065 KASSERT(size > 0); 1066 KASSERT(strat == VM_BESTFIT || strat == VM_INSTANTFIT); 1067 if ((flags & VM_SLEEP) != 0) { 1068 ASSERT_SLEEPABLE(); 1069 } 1070 KASSERT((align & vm->vm_quantum_mask) == 0); 1071 KASSERT((align & (align - 1)) == 0); 1072 KASSERT((phase & vm->vm_quantum_mask) == 0); 1073 KASSERT((nocross & vm->vm_quantum_mask) == 0); 1074 KASSERT((nocross & (nocross - 1)) == 0); 1075 KASSERT((align == 0 && phase == 0) || phase < align); 1076 KASSERT(nocross == 0 || nocross >= size); 1077 KASSERT(minaddr <= maxaddr); 1078 KASSERT(!VMEM_CROSS_P(phase, phase + size - 1, nocross)); 1079 1080 if (align == 0) { 1081 align = vm->vm_quantum_mask + 1; 1082 } 1083 1084 /* 1085 * allocate boundary tags before acquiring the vmem lock. 1086 */ 1087 btnew = bt_alloc(vm, flags); 1088 if (btnew == NULL) { 1089 return ENOMEM; 1090 } 1091 btnew2 = bt_alloc(vm, flags); /* XXX not necessary if no restrictions */ 1092 if (btnew2 == NULL) { 1093 bt_free(vm, btnew); 1094 return ENOMEM; 1095 } 1096 1097 /* 1098 * choose a free block from which we allocate. 1099 */ 1100 retry_strat: 1101 first = bt_freehead_toalloc(vm, size, strat); 1102 end = &vm->vm_freelist[VMEM_MAXORDER]; 1103 retry: 1104 bt = NULL; 1105 VMEM_LOCK(vm); 1106 vmem_check(vm); 1107 if (strat == VM_INSTANTFIT) { 1108 /* 1109 * just choose the first block which satisfies our restrictions. 1110 * 1111 * note that we don't need to check the size of the blocks 1112 * because any blocks found on these list should be larger than 1113 * the given size. 1114 */ 1115 for (list = first; list < end; list++) { 1116 bt = LIST_FIRST(list); 1117 if (bt != NULL) { 1118 rc = vmem_fit(bt, size, align, phase, 1119 nocross, minaddr, maxaddr, &start); 1120 if (rc == 0) { 1121 goto gotit; 1122 } 1123 /* 1124 * don't bother to follow the bt_freelist link 1125 * here. the list can be very long and we are 1126 * told to run fast. blocks from the later free 1127 * lists are larger and have better chances to 1128 * satisfy our restrictions. 1129 */ 1130 } 1131 } 1132 } else { /* VM_BESTFIT */ 1133 /* 1134 * we assume that, for space efficiency, it's better to 1135 * allocate from a smaller block. thus we will start searching 1136 * from the lower-order list than VM_INSTANTFIT. 1137 * however, don't bother to find the smallest block in a free 1138 * list because the list can be very long. we can revisit it 1139 * if/when it turns out to be a problem. 1140 * 1141 * note that the 'first' list can contain blocks smaller than 1142 * the requested size. thus we need to check bt_size. 1143 */ 1144 for (list = first; list < end; list++) { 1145 LIST_FOREACH(bt, list, bt_freelist) { 1146 if (bt->bt_size >= size) { 1147 rc = vmem_fit(bt, size, align, phase, 1148 nocross, minaddr, maxaddr, &start); 1149 if (rc == 0) { 1150 goto gotit; 1151 } 1152 } 1153 } 1154 } 1155 } 1156 VMEM_UNLOCK(vm); 1157 #if 1 1158 if (strat == VM_INSTANTFIT) { 1159 strat = VM_BESTFIT; 1160 goto retry_strat; 1161 } 1162 #endif 1163 if (align != vm->vm_quantum_mask + 1 || phase != 0 || nocross != 0) { 1164 1165 /* 1166 * XXX should try to import a region large enough to 1167 * satisfy restrictions? 1168 */ 1169 1170 goto fail; 1171 } 1172 /* XXX eeek, minaddr & maxaddr not respected */ 1173 if (vmem_import(vm, size, flags) == 0) { 1174 goto retry; 1175 } 1176 /* XXX */ 1177 1178 if ((flags & VM_SLEEP) != 0) { 1179 #if defined(_KERNEL) && !defined(_RUMPKERNEL) 1180 mutex_spin_enter(&uvm_fpageqlock); 1181 uvm_kick_pdaemon(); 1182 mutex_spin_exit(&uvm_fpageqlock); 1183 #endif 1184 VMEM_LOCK(vm); 1185 VMEM_CONDVAR_WAIT(vm); 1186 VMEM_UNLOCK(vm); 1187 goto retry; 1188 } 1189 fail: 1190 bt_free(vm, btnew); 1191 bt_free(vm, btnew2); 1192 return ENOMEM; 1193 1194 gotit: 1195 KASSERT(bt->bt_type == BT_TYPE_FREE); 1196 KASSERT(bt->bt_size >= size); 1197 bt_remfree(vm, bt); 1198 vmem_check(vm); 1199 if (bt->bt_start != start) { 1200 btnew2->bt_type = BT_TYPE_FREE; 1201 btnew2->bt_start = bt->bt_start; 1202 btnew2->bt_size = start - bt->bt_start; 1203 bt->bt_start = start; 1204 bt->bt_size -= btnew2->bt_size; 1205 bt_insfree(vm, btnew2); 1206 bt_insseg(vm, btnew2, TAILQ_PREV(bt, vmem_seglist, bt_seglist)); 1207 btnew2 = NULL; 1208 vmem_check(vm); 1209 } 1210 KASSERT(bt->bt_start == start); 1211 if (bt->bt_size != size && bt->bt_size - size > vm->vm_quantum_mask) { 1212 /* split */ 1213 btnew->bt_type = BT_TYPE_BUSY; 1214 btnew->bt_start = bt->bt_start; 1215 btnew->bt_size = size; 1216 bt->bt_start = bt->bt_start + size; 1217 bt->bt_size -= size; 1218 bt_insfree(vm, bt); 1219 bt_insseg(vm, btnew, TAILQ_PREV(bt, vmem_seglist, bt_seglist)); 1220 bt_insbusy(vm, btnew); 1221 vmem_check(vm); 1222 VMEM_UNLOCK(vm); 1223 } else { 1224 bt->bt_type = BT_TYPE_BUSY; 1225 bt_insbusy(vm, bt); 1226 vmem_check(vm); 1227 VMEM_UNLOCK(vm); 1228 bt_free(vm, btnew); 1229 btnew = bt; 1230 } 1231 if (btnew2 != NULL) { 1232 bt_free(vm, btnew2); 1233 } 1234 KASSERT(btnew->bt_size >= size); 1235 btnew->bt_type = BT_TYPE_BUSY; 1236 1237 if (addrp != NULL) 1238 *addrp = btnew->bt_start; 1239 return 0; 1240 } 1241 1242 /* 1243 * vmem_free: free the resource to the arena. 1244 */ 1245 1246 void 1247 vmem_free(vmem_t *vm, vmem_addr_t addr, vmem_size_t size) 1248 { 1249 1250 KASSERT(size > 0); 1251 1252 #if defined(QCACHE) 1253 if (size <= vm->vm_qcache_max) { 1254 int qidx = (size + vm->vm_quantum_mask) >> vm->vm_quantum_shift; 1255 qcache_t *qc = vm->vm_qcache[qidx - 1]; 1256 1257 pool_cache_put(qc->qc_cache, (void *)addr); 1258 return; 1259 } 1260 #endif /* defined(QCACHE) */ 1261 1262 vmem_xfree(vm, addr, size); 1263 } 1264 1265 void 1266 vmem_xfree(vmem_t *vm, vmem_addr_t addr, vmem_size_t size) 1267 { 1268 bt_t *bt; 1269 bt_t *t; 1270 LIST_HEAD(, vmem_btag) tofree; 1271 1272 LIST_INIT(&tofree); 1273 1274 KASSERT(size > 0); 1275 1276 VMEM_LOCK(vm); 1277 1278 bt = bt_lookupbusy(vm, addr); 1279 KASSERT(bt != NULL); 1280 KASSERT(bt->bt_start == addr); 1281 KASSERT(bt->bt_size == vmem_roundup_size(vm, size) || 1282 bt->bt_size - vmem_roundup_size(vm, size) <= vm->vm_quantum_mask); 1283 KASSERT(bt->bt_type == BT_TYPE_BUSY); 1284 bt_rembusy(vm, bt); 1285 bt->bt_type = BT_TYPE_FREE; 1286 1287 /* coalesce */ 1288 t = TAILQ_NEXT(bt, bt_seglist); 1289 if (t != NULL && t->bt_type == BT_TYPE_FREE) { 1290 KASSERT(BT_END(bt) < t->bt_start); /* YYY */ 1291 bt_remfree(vm, t); 1292 bt_remseg(vm, t); 1293 bt->bt_size += t->bt_size; 1294 LIST_INSERT_HEAD(&tofree, t, bt_freelist); 1295 } 1296 t = TAILQ_PREV(bt, vmem_seglist, bt_seglist); 1297 if (t != NULL && t->bt_type == BT_TYPE_FREE) { 1298 KASSERT(BT_END(t) < bt->bt_start); /* YYY */ 1299 bt_remfree(vm, t); 1300 bt_remseg(vm, t); 1301 bt->bt_size += t->bt_size; 1302 bt->bt_start = t->bt_start; 1303 LIST_INSERT_HEAD(&tofree, t, bt_freelist); 1304 } 1305 1306 t = TAILQ_PREV(bt, vmem_seglist, bt_seglist); 1307 KASSERT(t != NULL); 1308 KASSERT(BT_ISSPAN_P(t) || t->bt_type == BT_TYPE_BUSY); 1309 if (vm->vm_releasefn != NULL && t->bt_type == BT_TYPE_SPAN && 1310 t->bt_size == bt->bt_size) { 1311 vmem_addr_t spanaddr; 1312 vmem_size_t spansize; 1313 1314 KASSERT(t->bt_start == bt->bt_start); 1315 spanaddr = bt->bt_start; 1316 spansize = bt->bt_size; 1317 bt_remseg(vm, bt); 1318 LIST_INSERT_HEAD(&tofree, bt, bt_freelist); 1319 bt_remseg(vm, t); 1320 LIST_INSERT_HEAD(&tofree, t, bt_freelist); 1321 vm->vm_size -= spansize; 1322 VMEM_CONDVAR_BROADCAST(vm); 1323 VMEM_UNLOCK(vm); 1324 (*vm->vm_releasefn)(vm->vm_arg, spanaddr, spansize); 1325 } else { 1326 bt_insfree(vm, bt); 1327 VMEM_CONDVAR_BROADCAST(vm); 1328 VMEM_UNLOCK(vm); 1329 } 1330 1331 while (!LIST_EMPTY(&tofree)) { 1332 t = LIST_FIRST(&tofree); 1333 LIST_REMOVE(t, bt_freelist); 1334 bt_free(vm, t); 1335 } 1336 1337 bt_freetrim(vm, BT_MAXFREE); 1338 } 1339 1340 /* 1341 * vmem_add: 1342 * 1343 * => caller must ensure appropriate spl, 1344 * if the arena can be accessed from interrupt context. 1345 */ 1346 1347 int 1348 vmem_add(vmem_t *vm, vmem_addr_t addr, vmem_size_t size, vm_flag_t flags) 1349 { 1350 1351 return vmem_add1(vm, addr, size, flags, BT_TYPE_SPAN_STATIC); 1352 } 1353 1354 /* 1355 * vmem_size: information about arenas size 1356 * 1357 * => return free/allocated size in arena 1358 */ 1359 vmem_size_t 1360 vmem_size(vmem_t *vm, int typemask) 1361 { 1362 1363 switch (typemask) { 1364 case VMEM_ALLOC: 1365 return vm->vm_inuse; 1366 case VMEM_FREE: 1367 return vm->vm_size - vm->vm_inuse; 1368 case VMEM_FREE|VMEM_ALLOC: 1369 return vm->vm_size; 1370 default: 1371 panic("vmem_size"); 1372 } 1373 } 1374 1375 /* ---- rehash */ 1376 1377 #if defined(_KERNEL) 1378 static struct callout vmem_rehash_ch; 1379 static int vmem_rehash_interval; 1380 static struct workqueue *vmem_rehash_wq; 1381 static struct work vmem_rehash_wk; 1382 1383 static void 1384 vmem_rehash_all(struct work *wk, void *dummy) 1385 { 1386 vmem_t *vm; 1387 1388 KASSERT(wk == &vmem_rehash_wk); 1389 mutex_enter(&vmem_list_lock); 1390 LIST_FOREACH(vm, &vmem_list, vm_alllist) { 1391 size_t desired; 1392 size_t current; 1393 1394 if (!VMEM_TRYLOCK(vm)) { 1395 continue; 1396 } 1397 desired = vm->vm_nbusytag; 1398 current = vm->vm_hashsize; 1399 VMEM_UNLOCK(vm); 1400 1401 if (desired > VMEM_HASHSIZE_MAX) { 1402 desired = VMEM_HASHSIZE_MAX; 1403 } else if (desired < VMEM_HASHSIZE_MIN) { 1404 desired = VMEM_HASHSIZE_MIN; 1405 } 1406 if (desired > current * 2 || desired * 2 < current) { 1407 vmem_rehash(vm, desired, VM_NOSLEEP); 1408 } 1409 } 1410 mutex_exit(&vmem_list_lock); 1411 1412 callout_schedule(&vmem_rehash_ch, vmem_rehash_interval); 1413 } 1414 1415 static void 1416 vmem_rehash_all_kick(void *dummy) 1417 { 1418 1419 workqueue_enqueue(vmem_rehash_wq, &vmem_rehash_wk, NULL); 1420 } 1421 1422 void 1423 vmem_rehash_start(void) 1424 { 1425 int error; 1426 1427 error = workqueue_create(&vmem_rehash_wq, "vmem_rehash", 1428 vmem_rehash_all, NULL, PRI_VM, IPL_SOFTCLOCK, WQ_MPSAFE); 1429 if (error) { 1430 panic("%s: workqueue_create %d\n", __func__, error); 1431 } 1432 callout_init(&vmem_rehash_ch, CALLOUT_MPSAFE); 1433 callout_setfunc(&vmem_rehash_ch, vmem_rehash_all_kick, NULL); 1434 1435 vmem_rehash_interval = hz * 10; 1436 callout_schedule(&vmem_rehash_ch, vmem_rehash_interval); 1437 } 1438 #endif /* defined(_KERNEL) */ 1439 1440 /* ---- debug */ 1441 1442 #if defined(DDB) || defined(UNITTEST) || defined(VMEM_SANITY) 1443 1444 static void bt_dump(const bt_t *, void (*)(const char *, ...) 1445 __printflike(1, 2)); 1446 1447 static const char * 1448 bt_type_string(int type) 1449 { 1450 static const char * const table[] = { 1451 [BT_TYPE_BUSY] = "busy", 1452 [BT_TYPE_FREE] = "free", 1453 [BT_TYPE_SPAN] = "span", 1454 [BT_TYPE_SPAN_STATIC] = "static span", 1455 }; 1456 1457 if (type >= __arraycount(table)) { 1458 return "BOGUS"; 1459 } 1460 return table[type]; 1461 } 1462 1463 static void 1464 bt_dump(const bt_t *bt, void (*pr)(const char *, ...)) 1465 { 1466 1467 (*pr)("\t%p: %" PRIu64 ", %" PRIu64 ", %d(%s)\n", 1468 bt, (uint64_t)bt->bt_start, (uint64_t)bt->bt_size, 1469 bt->bt_type, bt_type_string(bt->bt_type)); 1470 } 1471 1472 static void 1473 vmem_dump(const vmem_t *vm , void (*pr)(const char *, ...) __printflike(1, 2)) 1474 { 1475 const bt_t *bt; 1476 int i; 1477 1478 (*pr)("vmem %p '%s'\n", vm, vm->vm_name); 1479 TAILQ_FOREACH(bt, &vm->vm_seglist, bt_seglist) { 1480 bt_dump(bt, pr); 1481 } 1482 1483 for (i = 0; i < VMEM_MAXORDER; i++) { 1484 const struct vmem_freelist *fl = &vm->vm_freelist[i]; 1485 1486 if (LIST_EMPTY(fl)) { 1487 continue; 1488 } 1489 1490 (*pr)("freelist[%d]\n", i); 1491 LIST_FOREACH(bt, fl, bt_freelist) { 1492 bt_dump(bt, pr); 1493 } 1494 } 1495 } 1496 1497 #endif /* defined(DDB) || defined(UNITTEST) || defined(VMEM_SANITY) */ 1498 1499 #if defined(DDB) 1500 static bt_t * 1501 vmem_whatis_lookup(vmem_t *vm, uintptr_t addr) 1502 { 1503 bt_t *bt; 1504 1505 TAILQ_FOREACH(bt, &vm->vm_seglist, bt_seglist) { 1506 if (BT_ISSPAN_P(bt)) { 1507 continue; 1508 } 1509 if (bt->bt_start <= addr && addr <= BT_END(bt)) { 1510 return bt; 1511 } 1512 } 1513 1514 return NULL; 1515 } 1516 1517 void 1518 vmem_whatis(uintptr_t addr, void (*pr)(const char *, ...)) 1519 { 1520 vmem_t *vm; 1521 1522 LIST_FOREACH(vm, &vmem_list, vm_alllist) { 1523 bt_t *bt; 1524 1525 bt = vmem_whatis_lookup(vm, addr); 1526 if (bt == NULL) { 1527 continue; 1528 } 1529 (*pr)("%p is %p+%zu in VMEM '%s' (%s)\n", 1530 (void *)addr, (void *)bt->bt_start, 1531 (size_t)(addr - bt->bt_start), vm->vm_name, 1532 (bt->bt_type == BT_TYPE_BUSY) ? "allocated" : "free"); 1533 } 1534 } 1535 1536 void 1537 vmem_printall(const char *modif, void (*pr)(const char *, ...)) 1538 { 1539 const vmem_t *vm; 1540 1541 LIST_FOREACH(vm, &vmem_list, vm_alllist) { 1542 vmem_dump(vm, pr); 1543 } 1544 } 1545 1546 void 1547 vmem_print(uintptr_t addr, const char *modif, void (*pr)(const char *, ...)) 1548 { 1549 const vmem_t *vm = (const void *)addr; 1550 1551 vmem_dump(vm, pr); 1552 } 1553 #endif /* defined(DDB) */ 1554 1555 #if defined(_KERNEL) 1556 #define vmem_printf printf 1557 #else 1558 #include <stdio.h> 1559 #include <stdarg.h> 1560 1561 static void 1562 vmem_printf(const char *fmt, ...) 1563 { 1564 va_list ap; 1565 va_start(ap, fmt); 1566 vprintf(fmt, ap); 1567 va_end(ap); 1568 } 1569 #endif 1570 1571 #if defined(VMEM_SANITY) 1572 1573 static bool 1574 vmem_check_sanity(vmem_t *vm) 1575 { 1576 const bt_t *bt, *bt2; 1577 1578 KASSERT(vm != NULL); 1579 1580 TAILQ_FOREACH(bt, &vm->vm_seglist, bt_seglist) { 1581 if (bt->bt_start > BT_END(bt)) { 1582 printf("corrupted tag\n"); 1583 bt_dump(bt, vmem_printf); 1584 return false; 1585 } 1586 } 1587 TAILQ_FOREACH(bt, &vm->vm_seglist, bt_seglist) { 1588 TAILQ_FOREACH(bt2, &vm->vm_seglist, bt_seglist) { 1589 if (bt == bt2) { 1590 continue; 1591 } 1592 if (BT_ISSPAN_P(bt) != BT_ISSPAN_P(bt2)) { 1593 continue; 1594 } 1595 if (bt->bt_start <= BT_END(bt2) && 1596 bt2->bt_start <= BT_END(bt)) { 1597 printf("overwrapped tags\n"); 1598 bt_dump(bt, vmem_printf); 1599 bt_dump(bt2, vmem_printf); 1600 return false; 1601 } 1602 } 1603 } 1604 1605 return true; 1606 } 1607 1608 static void 1609 vmem_check(vmem_t *vm) 1610 { 1611 1612 if (!vmem_check_sanity(vm)) { 1613 panic("insanity vmem %p", vm); 1614 } 1615 } 1616 1617 #endif /* defined(VMEM_SANITY) */ 1618 1619 #if defined(UNITTEST) 1620 int 1621 main(void) 1622 { 1623 int rc; 1624 vmem_t *vm; 1625 vmem_addr_t p; 1626 struct reg { 1627 vmem_addr_t p; 1628 vmem_size_t sz; 1629 bool x; 1630 } *reg = NULL; 1631 int nreg = 0; 1632 int nalloc = 0; 1633 int nfree = 0; 1634 vmem_size_t total = 0; 1635 #if 1 1636 vm_flag_t strat = VM_INSTANTFIT; 1637 #else 1638 vm_flag_t strat = VM_BESTFIT; 1639 #endif 1640 1641 vm = vmem_create("test", 0, 0, 1, NULL, NULL, NULL, 0, VM_SLEEP, 1642 #ifdef _KERNEL 1643 IPL_NONE 1644 #else 1645 0 1646 #endif 1647 ); 1648 if (vm == NULL) { 1649 printf("vmem_create\n"); 1650 exit(EXIT_FAILURE); 1651 } 1652 vmem_dump(vm, vmem_printf); 1653 1654 rc = vmem_add(vm, 0, 50, VM_SLEEP); 1655 assert(rc == 0); 1656 rc = vmem_add(vm, 100, 200, VM_SLEEP); 1657 assert(rc == 0); 1658 rc = vmem_add(vm, 2000, 1, VM_SLEEP); 1659 assert(rc == 0); 1660 rc = vmem_add(vm, 40000, 65536, VM_SLEEP); 1661 assert(rc == 0); 1662 rc = vmem_add(vm, 10000, 10000, VM_SLEEP); 1663 assert(rc == 0); 1664 rc = vmem_add(vm, 500, 1000, VM_SLEEP); 1665 assert(rc == 0); 1666 rc = vmem_add(vm, 0xffffff00, 0x100, VM_SLEEP); 1667 assert(rc == 0); 1668 rc = vmem_xalloc(vm, 0x101, 0, 0, 0, 1669 0xffffff00, 0xffffffff, strat|VM_SLEEP, &p); 1670 assert(rc != 0); 1671 rc = vmem_xalloc(vm, 50, 0, 0, 0, 0, 49, strat|VM_SLEEP, &p); 1672 assert(rc == 0 && p == 0); 1673 vmem_xfree(vm, p, 50); 1674 rc = vmem_xalloc(vm, 25, 0, 0, 0, 0, 24, strat|VM_SLEEP, &p); 1675 assert(rc == 0 && p == 0); 1676 rc = vmem_xalloc(vm, 0x100, 0, 0, 0, 1677 0xffffff01, 0xffffffff, strat|VM_SLEEP, &p); 1678 assert(rc != 0); 1679 rc = vmem_xalloc(vm, 0x100, 0, 0, 0, 1680 0xffffff00, 0xfffffffe, strat|VM_SLEEP, &p); 1681 assert(rc != 0); 1682 rc = vmem_xalloc(vm, 0x100, 0, 0, 0, 1683 0xffffff00, 0xffffffff, strat|VM_SLEEP, &p); 1684 assert(rc == 0); 1685 vmem_dump(vm, vmem_printf); 1686 for (;;) { 1687 struct reg *r; 1688 int t = rand() % 100; 1689 1690 if (t > 45) { 1691 /* alloc */ 1692 vmem_size_t sz = rand() % 500 + 1; 1693 bool x; 1694 vmem_size_t align, phase, nocross; 1695 vmem_addr_t minaddr, maxaddr; 1696 1697 if (t > 70) { 1698 x = true; 1699 /* XXX */ 1700 align = 1 << (rand() % 15); 1701 phase = rand() % 65536; 1702 nocross = 1 << (rand() % 15); 1703 if (align <= phase) { 1704 phase = 0; 1705 } 1706 if (VMEM_CROSS_P(phase, phase + sz - 1, 1707 nocross)) { 1708 nocross = 0; 1709 } 1710 do { 1711 minaddr = rand() % 50000; 1712 maxaddr = rand() % 70000; 1713 } while (minaddr > maxaddr); 1714 printf("=== xalloc %" PRIu64 1715 " align=%" PRIu64 ", phase=%" PRIu64 1716 ", nocross=%" PRIu64 ", min=%" PRIu64 1717 ", max=%" PRIu64 "\n", 1718 (uint64_t)sz, 1719 (uint64_t)align, 1720 (uint64_t)phase, 1721 (uint64_t)nocross, 1722 (uint64_t)minaddr, 1723 (uint64_t)maxaddr); 1724 rc = vmem_xalloc(vm, sz, align, phase, nocross, 1725 minaddr, maxaddr, strat|VM_SLEEP, &p); 1726 } else { 1727 x = false; 1728 printf("=== alloc %" PRIu64 "\n", (uint64_t)sz); 1729 rc = vmem_alloc(vm, sz, strat|VM_SLEEP, &p); 1730 } 1731 printf("-> %" PRIu64 "\n", (uint64_t)p); 1732 vmem_dump(vm, vmem_printf); 1733 if (rc != 0) { 1734 if (x) { 1735 continue; 1736 } 1737 break; 1738 } 1739 nreg++; 1740 reg = realloc(reg, sizeof(*reg) * nreg); 1741 r = ®[nreg - 1]; 1742 r->p = p; 1743 r->sz = sz; 1744 r->x = x; 1745 total += sz; 1746 nalloc++; 1747 } else if (nreg != 0) { 1748 /* free */ 1749 r = ®[rand() % nreg]; 1750 printf("=== free %" PRIu64 ", %" PRIu64 "\n", 1751 (uint64_t)r->p, (uint64_t)r->sz); 1752 if (r->x) { 1753 vmem_xfree(vm, r->p, r->sz); 1754 } else { 1755 vmem_free(vm, r->p, r->sz); 1756 } 1757 total -= r->sz; 1758 vmem_dump(vm, vmem_printf); 1759 *r = reg[nreg - 1]; 1760 nreg--; 1761 nfree++; 1762 } 1763 printf("total=%" PRIu64 "\n", (uint64_t)total); 1764 } 1765 fprintf(stderr, "total=%" PRIu64 ", nalloc=%d, nfree=%d\n", 1766 (uint64_t)total, nalloc, nfree); 1767 exit(EXIT_SUCCESS); 1768 } 1769 #endif /* defined(UNITTEST) */ 1770