1 /* $NetBSD: subr_vmem.c,v 1.82 2013/02/09 00:31:21 christos Exp $ */ 2 3 /*- 4 * Copyright (c)2006,2007,2008,2009 YAMAMOTO Takashi, 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 */ 28 29 /* 30 * reference: 31 * - Magazines and Vmem: Extending the Slab Allocator 32 * to Many CPUs and Arbitrary Resources 33 * http://www.usenix.org/event/usenix01/bonwick.html 34 */ 35 36 #include <sys/cdefs.h> 37 __KERNEL_RCSID(0, "$NetBSD: subr_vmem.c,v 1.82 2013/02/09 00:31:21 christos Exp $"); 38 39 #if defined(_KERNEL) 40 #include "opt_ddb.h" 41 #endif /* defined(_KERNEL) */ 42 43 #include <sys/param.h> 44 #include <sys/hash.h> 45 #include <sys/queue.h> 46 #include <sys/bitops.h> 47 48 #if defined(_KERNEL) 49 #include <sys/systm.h> 50 #include <sys/kernel.h> /* hz */ 51 #include <sys/callout.h> 52 #include <sys/kmem.h> 53 #include <sys/pool.h> 54 #include <sys/vmem.h> 55 #include <sys/vmem_impl.h> 56 #include <sys/workqueue.h> 57 #include <sys/atomic.h> 58 #include <uvm/uvm.h> 59 #include <uvm/uvm_extern.h> 60 #include <uvm/uvm_km.h> 61 #include <uvm/uvm_page.h> 62 #include <uvm/uvm_pdaemon.h> 63 #else /* defined(_KERNEL) */ 64 #include <stdio.h> 65 #include <errno.h> 66 #include <assert.h> 67 #include <stdlib.h> 68 #include <string.h> 69 #include "../sys/vmem.h" 70 #include "../sys/vmem_impl.h" 71 #endif /* defined(_KERNEL) */ 72 73 74 #if defined(_KERNEL) 75 #include <sys/evcnt.h> 76 #define VMEM_EVCNT_DEFINE(name) \ 77 struct evcnt vmem_evcnt_##name = EVCNT_INITIALIZER(EVCNT_TYPE_MISC, NULL, \ 78 "vmemev", #name); \ 79 EVCNT_ATTACH_STATIC(vmem_evcnt_##name); 80 #define VMEM_EVCNT_INCR(ev) vmem_evcnt_##ev.ev_count++ 81 #define VMEM_EVCNT_DECR(ev) vmem_evcnt_##ev.ev_count-- 82 83 VMEM_EVCNT_DEFINE(bt_pages) 84 VMEM_EVCNT_DEFINE(bt_count) 85 VMEM_EVCNT_DEFINE(bt_inuse) 86 87 #define VMEM_CONDVAR_INIT(vm, wchan) cv_init(&vm->vm_cv, wchan) 88 #define VMEM_CONDVAR_DESTROY(vm) cv_destroy(&vm->vm_cv) 89 #define VMEM_CONDVAR_WAIT(vm) cv_wait(&vm->vm_cv, &vm->vm_lock) 90 #define VMEM_CONDVAR_BROADCAST(vm) cv_broadcast(&vm->vm_cv) 91 92 #else /* defined(_KERNEL) */ 93 94 #define VMEM_EVCNT_INCR(ev) /* nothing */ 95 #define VMEM_EVCNT_DECR(ev) /* nothing */ 96 97 #define VMEM_CONDVAR_INIT(vm, wchan) /* nothing */ 98 #define VMEM_CONDVAR_DESTROY(vm) /* nothing */ 99 #define VMEM_CONDVAR_WAIT(vm) /* nothing */ 100 #define VMEM_CONDVAR_BROADCAST(vm) /* nothing */ 101 102 #define UNITTEST 103 #define KASSERT(a) assert(a) 104 #define mutex_init(a, b, c) /* nothing */ 105 #define mutex_destroy(a) /* nothing */ 106 #define mutex_enter(a) /* nothing */ 107 #define mutex_tryenter(a) true 108 #define mutex_exit(a) /* nothing */ 109 #define mutex_owned(a) /* nothing */ 110 #define ASSERT_SLEEPABLE() /* nothing */ 111 #define panic(...) printf(__VA_ARGS__); abort() 112 #endif /* defined(_KERNEL) */ 113 114 #if defined(VMEM_SANITY) 115 static void vmem_check(vmem_t *); 116 #else /* defined(VMEM_SANITY) */ 117 #define vmem_check(vm) /* nothing */ 118 #endif /* defined(VMEM_SANITY) */ 119 120 #define VMEM_HASHSIZE_MIN 1 /* XXX */ 121 #define VMEM_HASHSIZE_MAX 65536 /* XXX */ 122 #define VMEM_HASHSIZE_INIT 1 123 124 #define VM_FITMASK (VM_BESTFIT | VM_INSTANTFIT) 125 126 #if defined(_KERNEL) 127 static bool vmem_bootstrapped = false; 128 static kmutex_t vmem_list_lock; 129 static LIST_HEAD(, vmem) vmem_list = LIST_HEAD_INITIALIZER(vmem_list); 130 #endif /* defined(_KERNEL) */ 131 132 /* ---- misc */ 133 134 #define VMEM_LOCK(vm) mutex_enter(&vm->vm_lock) 135 #define VMEM_TRYLOCK(vm) mutex_tryenter(&vm->vm_lock) 136 #define VMEM_UNLOCK(vm) mutex_exit(&vm->vm_lock) 137 #define VMEM_LOCK_INIT(vm, ipl) mutex_init(&vm->vm_lock, MUTEX_DEFAULT, ipl) 138 #define VMEM_LOCK_DESTROY(vm) mutex_destroy(&vm->vm_lock) 139 #define VMEM_ASSERT_LOCKED(vm) KASSERT(mutex_owned(&vm->vm_lock)) 140 141 #define VMEM_ALIGNUP(addr, align) \ 142 (-(-(addr) & -(align))) 143 144 #define VMEM_CROSS_P(addr1, addr2, boundary) \ 145 ((((addr1) ^ (addr2)) & -(boundary)) != 0) 146 147 #define ORDER2SIZE(order) ((vmem_size_t)1 << (order)) 148 #define SIZE2ORDER(size) ((int)ilog2(size)) 149 150 #if !defined(_KERNEL) 151 #define xmalloc(sz, flags) malloc(sz) 152 #define xfree(p, sz) free(p) 153 #define bt_alloc(vm, flags) malloc(sizeof(bt_t)) 154 #define bt_free(vm, bt) free(bt) 155 #else /* defined(_KERNEL) */ 156 157 #define xmalloc(sz, flags) \ 158 kmem_alloc(sz, ((flags) & VM_SLEEP) ? KM_SLEEP : KM_NOSLEEP); 159 #define xfree(p, sz) kmem_free(p, sz); 160 161 /* 162 * BT_RESERVE calculation: 163 * we allocate memory for boundry tags with vmem, therefor we have 164 * to keep a reserve of bts used to allocated memory for bts. 165 * This reserve is 4 for each arena involved in allocating vmems memory. 166 * BT_MAXFREE: don't cache excessive counts of bts in arenas 167 */ 168 #define STATIC_BT_COUNT 200 169 #define BT_MINRESERVE 4 170 #define BT_MAXFREE 64 171 172 static struct vmem_btag static_bts[STATIC_BT_COUNT]; 173 static int static_bt_count = STATIC_BT_COUNT; 174 175 static struct vmem kmem_va_meta_arena_store; 176 vmem_t *kmem_va_meta_arena; 177 static struct vmem kmem_meta_arena_store; 178 vmem_t *kmem_meta_arena; 179 180 static kmutex_t vmem_refill_lock; 181 static kmutex_t vmem_btag_lock; 182 static LIST_HEAD(, vmem_btag) vmem_btag_freelist; 183 static size_t vmem_btag_freelist_count = 0; 184 static size_t vmem_btag_count = STATIC_BT_COUNT; 185 186 /* ---- boundary tag */ 187 188 #define BT_PER_PAGE (PAGE_SIZE / sizeof(bt_t)) 189 190 static int bt_refill(vmem_t *vm, vm_flag_t flags); 191 192 static int 193 bt_refillglobal(vm_flag_t flags) 194 { 195 vmem_addr_t va; 196 bt_t *btp; 197 bt_t *bt; 198 int i; 199 200 mutex_enter(&vmem_refill_lock); 201 202 mutex_enter(&vmem_btag_lock); 203 if (vmem_btag_freelist_count > 0) { 204 mutex_exit(&vmem_btag_lock); 205 mutex_exit(&vmem_refill_lock); 206 return 0; 207 } 208 mutex_exit(&vmem_btag_lock); 209 210 if (vmem_alloc(kmem_meta_arena, PAGE_SIZE, 211 (flags & ~VM_FITMASK) | VM_INSTANTFIT | VM_POPULATING, &va) != 0) { 212 mutex_exit(&vmem_refill_lock); 213 return ENOMEM; 214 } 215 VMEM_EVCNT_INCR(bt_pages); 216 217 mutex_enter(&vmem_btag_lock); 218 btp = (void *) va; 219 for (i = 0; i < (BT_PER_PAGE); i++) { 220 bt = btp; 221 memset(bt, 0, sizeof(*bt)); 222 LIST_INSERT_HEAD(&vmem_btag_freelist, bt, 223 bt_freelist); 224 vmem_btag_freelist_count++; 225 vmem_btag_count++; 226 VMEM_EVCNT_INCR(bt_count); 227 btp++; 228 } 229 mutex_exit(&vmem_btag_lock); 230 231 bt_refill(kmem_arena, (flags & ~VM_FITMASK) 232 | VM_INSTANTFIT | VM_POPULATING); 233 bt_refill(kmem_va_meta_arena, (flags & ~VM_FITMASK) 234 | VM_INSTANTFIT | VM_POPULATING); 235 bt_refill(kmem_meta_arena, (flags & ~VM_FITMASK) 236 | VM_INSTANTFIT | VM_POPULATING); 237 238 mutex_exit(&vmem_refill_lock); 239 240 return 0; 241 } 242 243 static int 244 bt_refill(vmem_t *vm, vm_flag_t flags) 245 { 246 bt_t *bt; 247 248 if (!(flags & VM_POPULATING)) { 249 bt_refillglobal(flags); 250 } 251 252 VMEM_LOCK(vm); 253 mutex_enter(&vmem_btag_lock); 254 while (!LIST_EMPTY(&vmem_btag_freelist) && 255 vm->vm_nfreetags <= BT_MINRESERVE) { 256 bt = LIST_FIRST(&vmem_btag_freelist); 257 LIST_REMOVE(bt, bt_freelist); 258 LIST_INSERT_HEAD(&vm->vm_freetags, bt, bt_freelist); 259 vm->vm_nfreetags++; 260 vmem_btag_freelist_count--; 261 } 262 mutex_exit(&vmem_btag_lock); 263 264 if (vm->vm_nfreetags == 0) { 265 VMEM_UNLOCK(vm); 266 return ENOMEM; 267 } 268 VMEM_UNLOCK(vm); 269 270 return 0; 271 } 272 273 static inline bt_t * 274 bt_alloc(vmem_t *vm, vm_flag_t flags) 275 { 276 bt_t *bt; 277 again: 278 VMEM_LOCK(vm); 279 if (vm->vm_nfreetags <= BT_MINRESERVE && 280 (flags & VM_POPULATING) == 0) { 281 VMEM_UNLOCK(vm); 282 if (bt_refill(vm, VM_NOSLEEP | VM_INSTANTFIT)) { 283 return NULL; 284 } 285 goto again; 286 } 287 bt = LIST_FIRST(&vm->vm_freetags); 288 LIST_REMOVE(bt, bt_freelist); 289 vm->vm_nfreetags--; 290 VMEM_UNLOCK(vm); 291 VMEM_EVCNT_INCR(bt_inuse); 292 293 return bt; 294 } 295 296 static inline void 297 bt_free(vmem_t *vm, bt_t *bt) 298 { 299 300 VMEM_LOCK(vm); 301 LIST_INSERT_HEAD(&vm->vm_freetags, bt, bt_freelist); 302 vm->vm_nfreetags++; 303 while (vm->vm_nfreetags > BT_MAXFREE) { 304 bt = LIST_FIRST(&vm->vm_freetags); 305 LIST_REMOVE(bt, bt_freelist); 306 vm->vm_nfreetags--; 307 mutex_enter(&vmem_btag_lock); 308 LIST_INSERT_HEAD(&vmem_btag_freelist, bt, bt_freelist); 309 vmem_btag_freelist_count++; 310 mutex_exit(&vmem_btag_lock); 311 } 312 VMEM_UNLOCK(vm); 313 VMEM_EVCNT_DECR(bt_inuse); 314 } 315 316 #endif /* defined(_KERNEL) */ 317 318 /* 319 * freelist[0] ... [1, 1] 320 * freelist[1] ... [2, 3] 321 * freelist[2] ... [4, 7] 322 * freelist[3] ... [8, 15] 323 * : 324 * freelist[n] ... [(1 << n), (1 << (n + 1)) - 1] 325 * : 326 */ 327 328 static struct vmem_freelist * 329 bt_freehead_tofree(vmem_t *vm, vmem_size_t size) 330 { 331 const vmem_size_t qsize = size >> vm->vm_quantum_shift; 332 const int idx = SIZE2ORDER(qsize); 333 334 KASSERT(size != 0 && qsize != 0); 335 KASSERT((size & vm->vm_quantum_mask) == 0); 336 KASSERT(idx >= 0); 337 KASSERT(idx < VMEM_MAXORDER); 338 339 return &vm->vm_freelist[idx]; 340 } 341 342 /* 343 * bt_freehead_toalloc: return the freelist for the given size and allocation 344 * strategy. 345 * 346 * for VM_INSTANTFIT, return the list in which any blocks are large enough 347 * for the requested size. otherwise, return the list which can have blocks 348 * large enough for the requested size. 349 */ 350 351 static struct vmem_freelist * 352 bt_freehead_toalloc(vmem_t *vm, vmem_size_t size, vm_flag_t strat) 353 { 354 const vmem_size_t qsize = size >> vm->vm_quantum_shift; 355 int idx = SIZE2ORDER(qsize); 356 357 KASSERT(size != 0 && qsize != 0); 358 KASSERT((size & vm->vm_quantum_mask) == 0); 359 360 if (strat == VM_INSTANTFIT && ORDER2SIZE(idx) != qsize) { 361 idx++; 362 /* check too large request? */ 363 } 364 KASSERT(idx >= 0); 365 KASSERT(idx < VMEM_MAXORDER); 366 367 return &vm->vm_freelist[idx]; 368 } 369 370 /* ---- boundary tag hash */ 371 372 static struct vmem_hashlist * 373 bt_hashhead(vmem_t *vm, vmem_addr_t addr) 374 { 375 struct vmem_hashlist *list; 376 unsigned int hash; 377 378 hash = hash32_buf(&addr, sizeof(addr), HASH32_BUF_INIT); 379 list = &vm->vm_hashlist[hash % vm->vm_hashsize]; 380 381 return list; 382 } 383 384 static bt_t * 385 bt_lookupbusy(vmem_t *vm, vmem_addr_t addr) 386 { 387 struct vmem_hashlist *list; 388 bt_t *bt; 389 390 list = bt_hashhead(vm, addr); 391 LIST_FOREACH(bt, list, bt_hashlist) { 392 if (bt->bt_start == addr) { 393 break; 394 } 395 } 396 397 return bt; 398 } 399 400 static void 401 bt_rembusy(vmem_t *vm, bt_t *bt) 402 { 403 404 KASSERT(vm->vm_nbusytag > 0); 405 vm->vm_inuse -= bt->bt_size; 406 vm->vm_nbusytag--; 407 LIST_REMOVE(bt, bt_hashlist); 408 } 409 410 static void 411 bt_insbusy(vmem_t *vm, bt_t *bt) 412 { 413 struct vmem_hashlist *list; 414 415 KASSERT(bt->bt_type == BT_TYPE_BUSY); 416 417 list = bt_hashhead(vm, bt->bt_start); 418 LIST_INSERT_HEAD(list, bt, bt_hashlist); 419 vm->vm_nbusytag++; 420 vm->vm_inuse += bt->bt_size; 421 } 422 423 /* ---- boundary tag list */ 424 425 static void 426 bt_remseg(vmem_t *vm, bt_t *bt) 427 { 428 429 CIRCLEQ_REMOVE(&vm->vm_seglist, bt, bt_seglist); 430 } 431 432 static void 433 bt_insseg(vmem_t *vm, bt_t *bt, bt_t *prev) 434 { 435 436 CIRCLEQ_INSERT_AFTER(&vm->vm_seglist, prev, bt, bt_seglist); 437 } 438 439 static void 440 bt_insseg_tail(vmem_t *vm, bt_t *bt) 441 { 442 443 CIRCLEQ_INSERT_TAIL(&vm->vm_seglist, bt, bt_seglist); 444 } 445 446 static void 447 bt_remfree(vmem_t *vm, bt_t *bt) 448 { 449 450 KASSERT(bt->bt_type == BT_TYPE_FREE); 451 452 LIST_REMOVE(bt, bt_freelist); 453 } 454 455 static void 456 bt_insfree(vmem_t *vm, bt_t *bt) 457 { 458 struct vmem_freelist *list; 459 460 list = bt_freehead_tofree(vm, bt->bt_size); 461 LIST_INSERT_HEAD(list, bt, bt_freelist); 462 } 463 464 /* ---- vmem internal functions */ 465 466 #if defined(QCACHE) 467 static inline vm_flag_t 468 prf_to_vmf(int prflags) 469 { 470 vm_flag_t vmflags; 471 472 KASSERT((prflags & ~(PR_LIMITFAIL | PR_WAITOK | PR_NOWAIT)) == 0); 473 if ((prflags & PR_WAITOK) != 0) { 474 vmflags = VM_SLEEP; 475 } else { 476 vmflags = VM_NOSLEEP; 477 } 478 return vmflags; 479 } 480 481 static inline int 482 vmf_to_prf(vm_flag_t vmflags) 483 { 484 int prflags; 485 486 if ((vmflags & VM_SLEEP) != 0) { 487 prflags = PR_WAITOK; 488 } else { 489 prflags = PR_NOWAIT; 490 } 491 return prflags; 492 } 493 494 static size_t 495 qc_poolpage_size(size_t qcache_max) 496 { 497 int i; 498 499 for (i = 0; ORDER2SIZE(i) <= qcache_max * 3; i++) { 500 /* nothing */ 501 } 502 return ORDER2SIZE(i); 503 } 504 505 static void * 506 qc_poolpage_alloc(struct pool *pool, int prflags) 507 { 508 qcache_t *qc = QC_POOL_TO_QCACHE(pool); 509 vmem_t *vm = qc->qc_vmem; 510 vmem_addr_t addr; 511 512 if (vmem_alloc(vm, pool->pr_alloc->pa_pagesz, 513 prf_to_vmf(prflags) | VM_INSTANTFIT, &addr) != 0) 514 return NULL; 515 return (void *)addr; 516 } 517 518 static void 519 qc_poolpage_free(struct pool *pool, void *addr) 520 { 521 qcache_t *qc = QC_POOL_TO_QCACHE(pool); 522 vmem_t *vm = qc->qc_vmem; 523 524 vmem_free(vm, (vmem_addr_t)addr, pool->pr_alloc->pa_pagesz); 525 } 526 527 static void 528 qc_init(vmem_t *vm, size_t qcache_max, int ipl) 529 { 530 qcache_t *prevqc; 531 struct pool_allocator *pa; 532 int qcache_idx_max; 533 int i; 534 535 KASSERT((qcache_max & vm->vm_quantum_mask) == 0); 536 if (qcache_max > (VMEM_QCACHE_IDX_MAX << vm->vm_quantum_shift)) { 537 qcache_max = VMEM_QCACHE_IDX_MAX << vm->vm_quantum_shift; 538 } 539 vm->vm_qcache_max = qcache_max; 540 pa = &vm->vm_qcache_allocator; 541 memset(pa, 0, sizeof(*pa)); 542 pa->pa_alloc = qc_poolpage_alloc; 543 pa->pa_free = qc_poolpage_free; 544 pa->pa_pagesz = qc_poolpage_size(qcache_max); 545 546 qcache_idx_max = qcache_max >> vm->vm_quantum_shift; 547 prevqc = NULL; 548 for (i = qcache_idx_max; i > 0; i--) { 549 qcache_t *qc = &vm->vm_qcache_store[i - 1]; 550 size_t size = i << vm->vm_quantum_shift; 551 pool_cache_t pc; 552 553 qc->qc_vmem = vm; 554 snprintf(qc->qc_name, sizeof(qc->qc_name), "%s-%zu", 555 vm->vm_name, size); 556 557 pc = pool_cache_init(size, 558 ORDER2SIZE(vm->vm_quantum_shift), 0, 559 PR_NOALIGN | PR_NOTOUCH | PR_RECURSIVE /* XXX */, 560 qc->qc_name, pa, ipl, NULL, NULL, NULL); 561 562 KASSERT(pc); 563 564 qc->qc_cache = pc; 565 KASSERT(qc->qc_cache != NULL); /* XXX */ 566 if (prevqc != NULL && 567 qc->qc_cache->pc_pool.pr_itemsperpage == 568 prevqc->qc_cache->pc_pool.pr_itemsperpage) { 569 pool_cache_destroy(qc->qc_cache); 570 vm->vm_qcache[i - 1] = prevqc; 571 continue; 572 } 573 qc->qc_cache->pc_pool.pr_qcache = qc; 574 vm->vm_qcache[i - 1] = qc; 575 prevqc = qc; 576 } 577 } 578 579 static void 580 qc_destroy(vmem_t *vm) 581 { 582 const qcache_t *prevqc; 583 int i; 584 int qcache_idx_max; 585 586 qcache_idx_max = vm->vm_qcache_max >> vm->vm_quantum_shift; 587 prevqc = NULL; 588 for (i = 0; i < qcache_idx_max; i++) { 589 qcache_t *qc = vm->vm_qcache[i]; 590 591 if (prevqc == qc) { 592 continue; 593 } 594 pool_cache_destroy(qc->qc_cache); 595 prevqc = qc; 596 } 597 } 598 #endif 599 600 #if defined(_KERNEL) 601 static void 602 vmem_bootstrap(void) 603 { 604 605 mutex_init(&vmem_list_lock, MUTEX_DEFAULT, IPL_VM); 606 mutex_init(&vmem_refill_lock, MUTEX_DEFAULT, IPL_VM); 607 mutex_init(&vmem_btag_lock, MUTEX_DEFAULT, IPL_VM); 608 609 while (static_bt_count-- > 0) { 610 bt_t *bt = &static_bts[static_bt_count]; 611 LIST_INSERT_HEAD(&vmem_btag_freelist, bt, bt_freelist); 612 VMEM_EVCNT_INCR(bt_count); 613 vmem_btag_freelist_count++; 614 } 615 vmem_bootstrapped = TRUE; 616 } 617 618 void 619 vmem_subsystem_init(vmem_t *vm) 620 { 621 622 kmem_va_meta_arena = vmem_init(&kmem_va_meta_arena_store, "vmem-va", 623 0, 0, PAGE_SIZE, vmem_alloc, vmem_free, vm, 624 0, VM_NOSLEEP | VM_BOOTSTRAP | VM_LARGEIMPORT, 625 IPL_VM); 626 627 kmem_meta_arena = vmem_init(&kmem_meta_arena_store, "vmem-meta", 628 0, 0, PAGE_SIZE, 629 uvm_km_kmem_alloc, uvm_km_kmem_free, kmem_va_meta_arena, 630 0, VM_NOSLEEP | VM_BOOTSTRAP, IPL_VM); 631 } 632 #endif /* defined(_KERNEL) */ 633 634 static int 635 vmem_add1(vmem_t *vm, vmem_addr_t addr, vmem_size_t size, vm_flag_t flags, 636 int spanbttype) 637 { 638 bt_t *btspan; 639 bt_t *btfree; 640 641 KASSERT((flags & (VM_SLEEP|VM_NOSLEEP)) != 0); 642 KASSERT((~flags & (VM_SLEEP|VM_NOSLEEP)) != 0); 643 KASSERT(spanbttype == BT_TYPE_SPAN || 644 spanbttype == BT_TYPE_SPAN_STATIC); 645 646 btspan = bt_alloc(vm, flags); 647 if (btspan == NULL) { 648 return ENOMEM; 649 } 650 btfree = bt_alloc(vm, flags); 651 if (btfree == NULL) { 652 bt_free(vm, btspan); 653 return ENOMEM; 654 } 655 656 btspan->bt_type = spanbttype; 657 btspan->bt_start = addr; 658 btspan->bt_size = size; 659 660 btfree->bt_type = BT_TYPE_FREE; 661 btfree->bt_start = addr; 662 btfree->bt_size = size; 663 664 VMEM_LOCK(vm); 665 bt_insseg_tail(vm, btspan); 666 bt_insseg(vm, btfree, btspan); 667 bt_insfree(vm, btfree); 668 vm->vm_size += size; 669 VMEM_UNLOCK(vm); 670 671 return 0; 672 } 673 674 static void 675 vmem_destroy1(vmem_t *vm) 676 { 677 678 #if defined(QCACHE) 679 qc_destroy(vm); 680 #endif /* defined(QCACHE) */ 681 if (vm->vm_hashlist != NULL) { 682 int i; 683 684 for (i = 0; i < vm->vm_hashsize; i++) { 685 bt_t *bt; 686 687 while ((bt = LIST_FIRST(&vm->vm_hashlist[i])) != NULL) { 688 KASSERT(bt->bt_type == BT_TYPE_SPAN_STATIC); 689 bt_free(vm, bt); 690 } 691 } 692 if (vm->vm_hashlist != &vm->vm_hash0) { 693 xfree(vm->vm_hashlist, 694 sizeof(struct vmem_hashlist *) * vm->vm_hashsize); 695 } 696 } 697 698 while (vm->vm_nfreetags > 0) { 699 bt_t *bt = LIST_FIRST(&vm->vm_freetags); 700 LIST_REMOVE(bt, bt_freelist); 701 vm->vm_nfreetags--; 702 mutex_enter(&vmem_btag_lock); 703 #if defined (_KERNEL) 704 LIST_INSERT_HEAD(&vmem_btag_freelist, bt, bt_freelist); 705 vmem_btag_freelist_count++; 706 #endif /* defined(_KERNEL) */ 707 mutex_exit(&vmem_btag_lock); 708 } 709 710 VMEM_CONDVAR_DESTROY(vm); 711 VMEM_LOCK_DESTROY(vm); 712 xfree(vm, sizeof(*vm)); 713 } 714 715 static int 716 vmem_import(vmem_t *vm, vmem_size_t size, vm_flag_t flags) 717 { 718 vmem_addr_t addr; 719 int rc; 720 721 if (vm->vm_importfn == NULL) { 722 return EINVAL; 723 } 724 725 if (vm->vm_flags & VM_LARGEIMPORT) { 726 size *= 16; 727 } 728 729 if (vm->vm_flags & VM_XIMPORT) { 730 rc = ((vmem_ximport_t *)vm->vm_importfn)(vm->vm_arg, size, 731 &size, flags, &addr); 732 } else { 733 rc = (vm->vm_importfn)(vm->vm_arg, size, flags, &addr); 734 } 735 if (rc) { 736 return ENOMEM; 737 } 738 739 if (vmem_add1(vm, addr, size, flags, BT_TYPE_SPAN) != 0) { 740 (*vm->vm_releasefn)(vm->vm_arg, addr, size); 741 return ENOMEM; 742 } 743 744 return 0; 745 } 746 747 static int 748 vmem_rehash(vmem_t *vm, size_t newhashsize, vm_flag_t flags) 749 { 750 bt_t *bt; 751 int i; 752 struct vmem_hashlist *newhashlist; 753 struct vmem_hashlist *oldhashlist; 754 size_t oldhashsize; 755 756 KASSERT(newhashsize > 0); 757 758 newhashlist = 759 xmalloc(sizeof(struct vmem_hashlist *) * newhashsize, flags); 760 if (newhashlist == NULL) { 761 return ENOMEM; 762 } 763 for (i = 0; i < newhashsize; i++) { 764 LIST_INIT(&newhashlist[i]); 765 } 766 767 if (!VMEM_TRYLOCK(vm)) { 768 xfree(newhashlist, 769 sizeof(struct vmem_hashlist *) * newhashsize); 770 return EBUSY; 771 } 772 oldhashlist = vm->vm_hashlist; 773 oldhashsize = vm->vm_hashsize; 774 vm->vm_hashlist = newhashlist; 775 vm->vm_hashsize = newhashsize; 776 if (oldhashlist == NULL) { 777 VMEM_UNLOCK(vm); 778 return 0; 779 } 780 for (i = 0; i < oldhashsize; i++) { 781 while ((bt = LIST_FIRST(&oldhashlist[i])) != NULL) { 782 bt_rembusy(vm, bt); /* XXX */ 783 bt_insbusy(vm, bt); 784 } 785 } 786 VMEM_UNLOCK(vm); 787 788 if (oldhashlist != &vm->vm_hash0) { 789 xfree(oldhashlist, 790 sizeof(struct vmem_hashlist *) * oldhashsize); 791 } 792 793 return 0; 794 } 795 796 /* 797 * vmem_fit: check if a bt can satisfy the given restrictions. 798 * 799 * it's a caller's responsibility to ensure the region is big enough 800 * before calling us. 801 */ 802 803 static int 804 vmem_fit(const bt_t *bt, vmem_size_t size, vmem_size_t align, 805 vmem_size_t phase, vmem_size_t nocross, 806 vmem_addr_t minaddr, vmem_addr_t maxaddr, vmem_addr_t *addrp) 807 { 808 vmem_addr_t start; 809 vmem_addr_t end; 810 811 KASSERT(size > 0); 812 KASSERT(bt->bt_size >= size); /* caller's responsibility */ 813 814 /* 815 * XXX assumption: vmem_addr_t and vmem_size_t are 816 * unsigned integer of the same size. 817 */ 818 819 start = bt->bt_start; 820 if (start < minaddr) { 821 start = minaddr; 822 } 823 end = BT_END(bt); 824 if (end > maxaddr) { 825 end = maxaddr; 826 } 827 if (start > end) { 828 return ENOMEM; 829 } 830 831 start = VMEM_ALIGNUP(start - phase, align) + phase; 832 if (start < bt->bt_start) { 833 start += align; 834 } 835 if (VMEM_CROSS_P(start, start + size - 1, nocross)) { 836 KASSERT(align < nocross); 837 start = VMEM_ALIGNUP(start - phase, nocross) + phase; 838 } 839 if (start <= end && end - start >= size - 1) { 840 KASSERT((start & (align - 1)) == phase); 841 KASSERT(!VMEM_CROSS_P(start, start + size - 1, nocross)); 842 KASSERT(minaddr <= start); 843 KASSERT(maxaddr == 0 || start + size - 1 <= maxaddr); 844 KASSERT(bt->bt_start <= start); 845 KASSERT(BT_END(bt) - start >= size - 1); 846 *addrp = start; 847 return 0; 848 } 849 return ENOMEM; 850 } 851 852 /* ---- vmem API */ 853 854 /* 855 * vmem_create_internal: creates a vmem arena. 856 */ 857 858 vmem_t * 859 vmem_init(vmem_t *vm, const char *name, 860 vmem_addr_t base, vmem_size_t size, vmem_size_t quantum, 861 vmem_import_t *importfn, vmem_release_t *releasefn, 862 vmem_t *arg, vmem_size_t qcache_max, vm_flag_t flags, int ipl) 863 { 864 int i; 865 866 KASSERT((flags & (VM_SLEEP|VM_NOSLEEP)) != 0); 867 KASSERT((~flags & (VM_SLEEP|VM_NOSLEEP)) != 0); 868 KASSERT(quantum > 0); 869 870 #if defined(_KERNEL) 871 /* XXX: SMP, we get called early... */ 872 if (!vmem_bootstrapped) { 873 vmem_bootstrap(); 874 } 875 #endif /* defined(_KERNEL) */ 876 877 if (vm == NULL) { 878 vm = xmalloc(sizeof(*vm), flags); 879 } 880 if (vm == NULL) { 881 return NULL; 882 } 883 884 VMEM_CONDVAR_INIT(vm, "vmem"); 885 VMEM_LOCK_INIT(vm, ipl); 886 vm->vm_flags = flags; 887 vm->vm_nfreetags = 0; 888 LIST_INIT(&vm->vm_freetags); 889 strlcpy(vm->vm_name, name, sizeof(vm->vm_name)); 890 vm->vm_quantum_mask = quantum - 1; 891 vm->vm_quantum_shift = SIZE2ORDER(quantum); 892 KASSERT(ORDER2SIZE(vm->vm_quantum_shift) == quantum); 893 vm->vm_importfn = importfn; 894 vm->vm_releasefn = releasefn; 895 vm->vm_arg = arg; 896 vm->vm_nbusytag = 0; 897 vm->vm_size = 0; 898 vm->vm_inuse = 0; 899 #if defined(QCACHE) 900 qc_init(vm, qcache_max, ipl); 901 #endif /* defined(QCACHE) */ 902 903 CIRCLEQ_INIT(&vm->vm_seglist); 904 for (i = 0; i < VMEM_MAXORDER; i++) { 905 LIST_INIT(&vm->vm_freelist[i]); 906 } 907 memset(&vm->vm_hash0, 0, sizeof(struct vmem_hashlist)); 908 vm->vm_hashsize = 1; 909 vm->vm_hashlist = &vm->vm_hash0; 910 911 if (size != 0) { 912 if (vmem_add(vm, base, size, flags) != 0) { 913 vmem_destroy1(vm); 914 return NULL; 915 } 916 } 917 918 #if defined(_KERNEL) 919 if (flags & VM_BOOTSTRAP) { 920 bt_refill(vm, VM_NOSLEEP); 921 } 922 923 mutex_enter(&vmem_list_lock); 924 LIST_INSERT_HEAD(&vmem_list, vm, vm_alllist); 925 mutex_exit(&vmem_list_lock); 926 #endif /* defined(_KERNEL) */ 927 928 return vm; 929 } 930 931 932 933 /* 934 * vmem_create: create an arena. 935 * 936 * => must not be called from interrupt context. 937 */ 938 939 vmem_t * 940 vmem_create(const char *name, vmem_addr_t base, vmem_size_t size, 941 vmem_size_t quantum, vmem_import_t *importfn, vmem_release_t *releasefn, 942 vmem_t *source, vmem_size_t qcache_max, vm_flag_t flags, int ipl) 943 { 944 945 KASSERT((flags & (VM_XIMPORT)) == 0); 946 947 return vmem_init(NULL, name, base, size, quantum, 948 importfn, releasefn, source, qcache_max, flags, ipl); 949 } 950 951 /* 952 * vmem_xcreate: create an arena takes alternative import func. 953 * 954 * => must not be called from interrupt context. 955 */ 956 957 vmem_t * 958 vmem_xcreate(const char *name, vmem_addr_t base, vmem_size_t size, 959 vmem_size_t quantum, vmem_ximport_t *importfn, vmem_release_t *releasefn, 960 vmem_t *source, vmem_size_t qcache_max, vm_flag_t flags, int ipl) 961 { 962 963 KASSERT((flags & (VM_XIMPORT)) == 0); 964 965 return vmem_init(NULL, name, base, size, quantum, 966 (vmem_import_t *)importfn, releasefn, source, 967 qcache_max, flags | VM_XIMPORT, ipl); 968 } 969 970 void 971 vmem_destroy(vmem_t *vm) 972 { 973 974 #if defined(_KERNEL) 975 mutex_enter(&vmem_list_lock); 976 LIST_REMOVE(vm, vm_alllist); 977 mutex_exit(&vmem_list_lock); 978 #endif /* defined(_KERNEL) */ 979 980 vmem_destroy1(vm); 981 } 982 983 vmem_size_t 984 vmem_roundup_size(vmem_t *vm, vmem_size_t size) 985 { 986 987 return (size + vm->vm_quantum_mask) & ~vm->vm_quantum_mask; 988 } 989 990 /* 991 * vmem_alloc: 992 * 993 * => caller must ensure appropriate spl, 994 * if the arena can be accessed from interrupt context. 995 */ 996 997 int 998 vmem_alloc(vmem_t *vm, vmem_size_t size, vm_flag_t flags, vmem_addr_t *addrp) 999 { 1000 const vm_flag_t strat __unused = flags & VM_FITMASK; 1001 1002 KASSERT((flags & (VM_SLEEP|VM_NOSLEEP)) != 0); 1003 KASSERT((~flags & (VM_SLEEP|VM_NOSLEEP)) != 0); 1004 1005 KASSERT(size > 0); 1006 KASSERT(strat == VM_BESTFIT || strat == VM_INSTANTFIT); 1007 if ((flags & VM_SLEEP) != 0) { 1008 ASSERT_SLEEPABLE(); 1009 } 1010 1011 #if defined(QCACHE) 1012 if (size <= vm->vm_qcache_max) { 1013 void *p; 1014 int qidx = (size + vm->vm_quantum_mask) >> vm->vm_quantum_shift; 1015 qcache_t *qc = vm->vm_qcache[qidx - 1]; 1016 1017 p = pool_cache_get(qc->qc_cache, vmf_to_prf(flags)); 1018 if (addrp != NULL) 1019 *addrp = (vmem_addr_t)p; 1020 return (p == NULL) ? ENOMEM : 0; 1021 } 1022 #endif /* defined(QCACHE) */ 1023 1024 return vmem_xalloc(vm, size, 0, 0, 0, VMEM_ADDR_MIN, VMEM_ADDR_MAX, 1025 flags, addrp); 1026 } 1027 1028 int 1029 vmem_xalloc(vmem_t *vm, const vmem_size_t size0, vmem_size_t align, 1030 const vmem_size_t phase, const vmem_size_t nocross, 1031 const vmem_addr_t minaddr, const vmem_addr_t maxaddr, const vm_flag_t flags, 1032 vmem_addr_t *addrp) 1033 { 1034 struct vmem_freelist *list; 1035 struct vmem_freelist *first; 1036 struct vmem_freelist *end; 1037 bt_t *bt; 1038 bt_t *btnew; 1039 bt_t *btnew2; 1040 const vmem_size_t size = vmem_roundup_size(vm, size0); 1041 vm_flag_t strat = flags & VM_FITMASK; 1042 vmem_addr_t start; 1043 int rc; 1044 1045 KASSERT(size0 > 0); 1046 KASSERT(size > 0); 1047 KASSERT(strat == VM_BESTFIT || strat == VM_INSTANTFIT); 1048 if ((flags & VM_SLEEP) != 0) { 1049 ASSERT_SLEEPABLE(); 1050 } 1051 KASSERT((align & vm->vm_quantum_mask) == 0); 1052 KASSERT((align & (align - 1)) == 0); 1053 KASSERT((phase & vm->vm_quantum_mask) == 0); 1054 KASSERT((nocross & vm->vm_quantum_mask) == 0); 1055 KASSERT((nocross & (nocross - 1)) == 0); 1056 KASSERT((align == 0 && phase == 0) || phase < align); 1057 KASSERT(nocross == 0 || nocross >= size); 1058 KASSERT(minaddr <= maxaddr); 1059 KASSERT(!VMEM_CROSS_P(phase, phase + size - 1, nocross)); 1060 1061 if (align == 0) { 1062 align = vm->vm_quantum_mask + 1; 1063 } 1064 1065 /* 1066 * allocate boundary tags before acquiring the vmem lock. 1067 */ 1068 btnew = bt_alloc(vm, flags); 1069 if (btnew == NULL) { 1070 return ENOMEM; 1071 } 1072 btnew2 = bt_alloc(vm, flags); /* XXX not necessary if no restrictions */ 1073 if (btnew2 == NULL) { 1074 bt_free(vm, btnew); 1075 return ENOMEM; 1076 } 1077 1078 /* 1079 * choose a free block from which we allocate. 1080 */ 1081 retry_strat: 1082 first = bt_freehead_toalloc(vm, size, strat); 1083 end = &vm->vm_freelist[VMEM_MAXORDER]; 1084 retry: 1085 bt = NULL; 1086 VMEM_LOCK(vm); 1087 vmem_check(vm); 1088 if (strat == VM_INSTANTFIT) { 1089 /* 1090 * just choose the first block which satisfies our restrictions. 1091 * 1092 * note that we don't need to check the size of the blocks 1093 * because any blocks found on these list should be larger than 1094 * the given size. 1095 */ 1096 for (list = first; list < end; list++) { 1097 bt = LIST_FIRST(list); 1098 if (bt != NULL) { 1099 rc = vmem_fit(bt, size, align, phase, 1100 nocross, minaddr, maxaddr, &start); 1101 if (rc == 0) { 1102 goto gotit; 1103 } 1104 /* 1105 * don't bother to follow the bt_freelist link 1106 * here. the list can be very long and we are 1107 * told to run fast. blocks from the later free 1108 * lists are larger and have better chances to 1109 * satisfy our restrictions. 1110 */ 1111 } 1112 } 1113 } else { /* VM_BESTFIT */ 1114 /* 1115 * we assume that, for space efficiency, it's better to 1116 * allocate from a smaller block. thus we will start searching 1117 * from the lower-order list than VM_INSTANTFIT. 1118 * however, don't bother to find the smallest block in a free 1119 * list because the list can be very long. we can revisit it 1120 * if/when it turns out to be a problem. 1121 * 1122 * note that the 'first' list can contain blocks smaller than 1123 * the requested size. thus we need to check bt_size. 1124 */ 1125 for (list = first; list < end; list++) { 1126 LIST_FOREACH(bt, list, bt_freelist) { 1127 if (bt->bt_size >= size) { 1128 rc = vmem_fit(bt, size, align, phase, 1129 nocross, minaddr, maxaddr, &start); 1130 if (rc == 0) { 1131 goto gotit; 1132 } 1133 } 1134 } 1135 } 1136 } 1137 VMEM_UNLOCK(vm); 1138 #if 1 1139 if (strat == VM_INSTANTFIT) { 1140 strat = VM_BESTFIT; 1141 goto retry_strat; 1142 } 1143 #endif 1144 if (align != vm->vm_quantum_mask + 1 || phase != 0 || nocross != 0) { 1145 1146 /* 1147 * XXX should try to import a region large enough to 1148 * satisfy restrictions? 1149 */ 1150 1151 goto fail; 1152 } 1153 /* XXX eeek, minaddr & maxaddr not respected */ 1154 if (vmem_import(vm, size, flags) == 0) { 1155 goto retry; 1156 } 1157 /* XXX */ 1158 1159 if ((flags & VM_SLEEP) != 0) { 1160 #if defined(_KERNEL) && !defined(_RUMPKERNEL) 1161 mutex_spin_enter(&uvm_fpageqlock); 1162 uvm_kick_pdaemon(); 1163 mutex_spin_exit(&uvm_fpageqlock); 1164 #endif 1165 VMEM_LOCK(vm); 1166 VMEM_CONDVAR_WAIT(vm); 1167 VMEM_UNLOCK(vm); 1168 goto retry; 1169 } 1170 fail: 1171 bt_free(vm, btnew); 1172 bt_free(vm, btnew2); 1173 return ENOMEM; 1174 1175 gotit: 1176 KASSERT(bt->bt_type == BT_TYPE_FREE); 1177 KASSERT(bt->bt_size >= size); 1178 bt_remfree(vm, bt); 1179 vmem_check(vm); 1180 if (bt->bt_start != start) { 1181 btnew2->bt_type = BT_TYPE_FREE; 1182 btnew2->bt_start = bt->bt_start; 1183 btnew2->bt_size = start - bt->bt_start; 1184 bt->bt_start = start; 1185 bt->bt_size -= btnew2->bt_size; 1186 bt_insfree(vm, btnew2); 1187 bt_insseg(vm, btnew2, CIRCLEQ_PREV(bt, bt_seglist)); 1188 btnew2 = NULL; 1189 vmem_check(vm); 1190 } 1191 KASSERT(bt->bt_start == start); 1192 if (bt->bt_size != size && bt->bt_size - size > vm->vm_quantum_mask) { 1193 /* split */ 1194 btnew->bt_type = BT_TYPE_BUSY; 1195 btnew->bt_start = bt->bt_start; 1196 btnew->bt_size = size; 1197 bt->bt_start = bt->bt_start + size; 1198 bt->bt_size -= size; 1199 bt_insfree(vm, bt); 1200 bt_insseg(vm, btnew, CIRCLEQ_PREV(bt, bt_seglist)); 1201 bt_insbusy(vm, btnew); 1202 vmem_check(vm); 1203 VMEM_UNLOCK(vm); 1204 } else { 1205 bt->bt_type = BT_TYPE_BUSY; 1206 bt_insbusy(vm, bt); 1207 vmem_check(vm); 1208 VMEM_UNLOCK(vm); 1209 bt_free(vm, btnew); 1210 btnew = bt; 1211 } 1212 if (btnew2 != NULL) { 1213 bt_free(vm, btnew2); 1214 } 1215 KASSERT(btnew->bt_size >= size); 1216 btnew->bt_type = BT_TYPE_BUSY; 1217 1218 if (addrp != NULL) 1219 *addrp = btnew->bt_start; 1220 return 0; 1221 } 1222 1223 /* 1224 * vmem_free: 1225 * 1226 * => caller must ensure appropriate spl, 1227 * if the arena can be accessed from interrupt context. 1228 */ 1229 1230 void 1231 vmem_free(vmem_t *vm, vmem_addr_t addr, vmem_size_t size) 1232 { 1233 1234 KASSERT(size > 0); 1235 1236 #if defined(QCACHE) 1237 if (size <= vm->vm_qcache_max) { 1238 int qidx = (size + vm->vm_quantum_mask) >> vm->vm_quantum_shift; 1239 qcache_t *qc = vm->vm_qcache[qidx - 1]; 1240 1241 pool_cache_put(qc->qc_cache, (void *)addr); 1242 return; 1243 } 1244 #endif /* defined(QCACHE) */ 1245 1246 vmem_xfree(vm, addr, size); 1247 } 1248 1249 void 1250 vmem_xfree(vmem_t *vm, vmem_addr_t addr, vmem_size_t size) 1251 { 1252 bt_t *bt; 1253 bt_t *t; 1254 LIST_HEAD(, vmem_btag) tofree; 1255 1256 LIST_INIT(&tofree); 1257 1258 KASSERT(size > 0); 1259 1260 VMEM_LOCK(vm); 1261 1262 bt = bt_lookupbusy(vm, addr); 1263 KASSERT(bt != NULL); 1264 KASSERT(bt->bt_start == addr); 1265 KASSERT(bt->bt_size == vmem_roundup_size(vm, size) || 1266 bt->bt_size - vmem_roundup_size(vm, size) <= vm->vm_quantum_mask); 1267 KASSERT(bt->bt_type == BT_TYPE_BUSY); 1268 bt_rembusy(vm, bt); 1269 bt->bt_type = BT_TYPE_FREE; 1270 1271 /* coalesce */ 1272 t = CIRCLEQ_NEXT(bt, bt_seglist); 1273 if (t != NULL && t->bt_type == BT_TYPE_FREE) { 1274 KASSERT(BT_END(bt) < t->bt_start); /* YYY */ 1275 bt_remfree(vm, t); 1276 bt_remseg(vm, t); 1277 bt->bt_size += t->bt_size; 1278 LIST_INSERT_HEAD(&tofree, t, bt_freelist); 1279 } 1280 t = CIRCLEQ_PREV(bt, bt_seglist); 1281 if (t != NULL && t->bt_type == BT_TYPE_FREE) { 1282 KASSERT(BT_END(t) < bt->bt_start); /* YYY */ 1283 bt_remfree(vm, t); 1284 bt_remseg(vm, t); 1285 bt->bt_size += t->bt_size; 1286 bt->bt_start = t->bt_start; 1287 LIST_INSERT_HEAD(&tofree, t, bt_freelist); 1288 } 1289 1290 t = CIRCLEQ_PREV(bt, bt_seglist); 1291 KASSERT(t != NULL); 1292 KASSERT(BT_ISSPAN_P(t) || t->bt_type == BT_TYPE_BUSY); 1293 if (vm->vm_releasefn != NULL && t->bt_type == BT_TYPE_SPAN && 1294 t->bt_size == bt->bt_size) { 1295 vmem_addr_t spanaddr; 1296 vmem_size_t spansize; 1297 1298 KASSERT(t->bt_start == bt->bt_start); 1299 spanaddr = bt->bt_start; 1300 spansize = bt->bt_size; 1301 bt_remseg(vm, bt); 1302 LIST_INSERT_HEAD(&tofree, bt, bt_freelist); 1303 bt_remseg(vm, t); 1304 LIST_INSERT_HEAD(&tofree, t, bt_freelist); 1305 vm->vm_size -= spansize; 1306 VMEM_CONDVAR_BROADCAST(vm); 1307 VMEM_UNLOCK(vm); 1308 (*vm->vm_releasefn)(vm->vm_arg, spanaddr, spansize); 1309 } else { 1310 bt_insfree(vm, bt); 1311 VMEM_CONDVAR_BROADCAST(vm); 1312 VMEM_UNLOCK(vm); 1313 } 1314 1315 while (!LIST_EMPTY(&tofree)) { 1316 t = LIST_FIRST(&tofree); 1317 LIST_REMOVE(t, bt_freelist); 1318 bt_free(vm, t); 1319 } 1320 } 1321 1322 /* 1323 * vmem_add: 1324 * 1325 * => caller must ensure appropriate spl, 1326 * if the arena can be accessed from interrupt context. 1327 */ 1328 1329 int 1330 vmem_add(vmem_t *vm, vmem_addr_t addr, vmem_size_t size, vm_flag_t flags) 1331 { 1332 1333 return vmem_add1(vm, addr, size, flags, BT_TYPE_SPAN_STATIC); 1334 } 1335 1336 /* 1337 * vmem_size: information about arenas size 1338 * 1339 * => return free/allocated size in arena 1340 */ 1341 vmem_size_t 1342 vmem_size(vmem_t *vm, int typemask) 1343 { 1344 1345 switch (typemask) { 1346 case VMEM_ALLOC: 1347 return vm->vm_inuse; 1348 case VMEM_FREE: 1349 return vm->vm_size - vm->vm_inuse; 1350 case VMEM_FREE|VMEM_ALLOC: 1351 return vm->vm_size; 1352 default: 1353 panic("vmem_size"); 1354 } 1355 } 1356 1357 /* ---- rehash */ 1358 1359 #if defined(_KERNEL) 1360 static struct callout vmem_rehash_ch; 1361 static int vmem_rehash_interval; 1362 static struct workqueue *vmem_rehash_wq; 1363 static struct work vmem_rehash_wk; 1364 1365 static void 1366 vmem_rehash_all(struct work *wk, void *dummy) 1367 { 1368 vmem_t *vm; 1369 1370 KASSERT(wk == &vmem_rehash_wk); 1371 mutex_enter(&vmem_list_lock); 1372 LIST_FOREACH(vm, &vmem_list, vm_alllist) { 1373 size_t desired; 1374 size_t current; 1375 1376 if (!VMEM_TRYLOCK(vm)) { 1377 continue; 1378 } 1379 desired = vm->vm_nbusytag; 1380 current = vm->vm_hashsize; 1381 VMEM_UNLOCK(vm); 1382 1383 if (desired > VMEM_HASHSIZE_MAX) { 1384 desired = VMEM_HASHSIZE_MAX; 1385 } else if (desired < VMEM_HASHSIZE_MIN) { 1386 desired = VMEM_HASHSIZE_MIN; 1387 } 1388 if (desired > current * 2 || desired * 2 < current) { 1389 vmem_rehash(vm, desired, VM_NOSLEEP); 1390 } 1391 } 1392 mutex_exit(&vmem_list_lock); 1393 1394 callout_schedule(&vmem_rehash_ch, vmem_rehash_interval); 1395 } 1396 1397 static void 1398 vmem_rehash_all_kick(void *dummy) 1399 { 1400 1401 workqueue_enqueue(vmem_rehash_wq, &vmem_rehash_wk, NULL); 1402 } 1403 1404 void 1405 vmem_rehash_start(void) 1406 { 1407 int error; 1408 1409 error = workqueue_create(&vmem_rehash_wq, "vmem_rehash", 1410 vmem_rehash_all, NULL, PRI_VM, IPL_SOFTCLOCK, WQ_MPSAFE); 1411 if (error) { 1412 panic("%s: workqueue_create %d\n", __func__, error); 1413 } 1414 callout_init(&vmem_rehash_ch, CALLOUT_MPSAFE); 1415 callout_setfunc(&vmem_rehash_ch, vmem_rehash_all_kick, NULL); 1416 1417 vmem_rehash_interval = hz * 10; 1418 callout_schedule(&vmem_rehash_ch, vmem_rehash_interval); 1419 } 1420 #endif /* defined(_KERNEL) */ 1421 1422 /* ---- debug */ 1423 1424 #if defined(DDB) || defined(UNITTEST) || defined(VMEM_SANITY) 1425 1426 static void bt_dump(const bt_t *, void (*)(const char *, ...) 1427 __printflike(1, 2)); 1428 1429 static const char * 1430 bt_type_string(int type) 1431 { 1432 static const char * const table[] = { 1433 [BT_TYPE_BUSY] = "busy", 1434 [BT_TYPE_FREE] = "free", 1435 [BT_TYPE_SPAN] = "span", 1436 [BT_TYPE_SPAN_STATIC] = "static span", 1437 }; 1438 1439 if (type >= __arraycount(table)) { 1440 return "BOGUS"; 1441 } 1442 return table[type]; 1443 } 1444 1445 static void 1446 bt_dump(const bt_t *bt, void (*pr)(const char *, ...)) 1447 { 1448 1449 (*pr)("\t%p: %" PRIu64 ", %" PRIu64 ", %d(%s)\n", 1450 bt, (uint64_t)bt->bt_start, (uint64_t)bt->bt_size, 1451 bt->bt_type, bt_type_string(bt->bt_type)); 1452 } 1453 1454 static void 1455 vmem_dump(const vmem_t *vm , void (*pr)(const char *, ...) __printflike(1, 2)) 1456 { 1457 const bt_t *bt; 1458 int i; 1459 1460 (*pr)("vmem %p '%s'\n", vm, vm->vm_name); 1461 CIRCLEQ_FOREACH(bt, &vm->vm_seglist, bt_seglist) { 1462 bt_dump(bt, pr); 1463 } 1464 1465 for (i = 0; i < VMEM_MAXORDER; i++) { 1466 const struct vmem_freelist *fl = &vm->vm_freelist[i]; 1467 1468 if (LIST_EMPTY(fl)) { 1469 continue; 1470 } 1471 1472 (*pr)("freelist[%d]\n", i); 1473 LIST_FOREACH(bt, fl, bt_freelist) { 1474 bt_dump(bt, pr); 1475 } 1476 } 1477 } 1478 1479 #endif /* defined(DDB) || defined(UNITTEST) || defined(VMEM_SANITY) */ 1480 1481 #if defined(DDB) 1482 static bt_t * 1483 vmem_whatis_lookup(vmem_t *vm, uintptr_t addr) 1484 { 1485 bt_t *bt; 1486 1487 CIRCLEQ_FOREACH(bt, &vm->vm_seglist, bt_seglist) { 1488 if (BT_ISSPAN_P(bt)) { 1489 continue; 1490 } 1491 if (bt->bt_start <= addr && addr <= BT_END(bt)) { 1492 return bt; 1493 } 1494 } 1495 1496 return NULL; 1497 } 1498 1499 void 1500 vmem_whatis(uintptr_t addr, void (*pr)(const char *, ...)) 1501 { 1502 vmem_t *vm; 1503 1504 LIST_FOREACH(vm, &vmem_list, vm_alllist) { 1505 bt_t *bt; 1506 1507 bt = vmem_whatis_lookup(vm, addr); 1508 if (bt == NULL) { 1509 continue; 1510 } 1511 (*pr)("%p is %p+%zu in VMEM '%s' (%s)\n", 1512 (void *)addr, (void *)bt->bt_start, 1513 (size_t)(addr - bt->bt_start), vm->vm_name, 1514 (bt->bt_type == BT_TYPE_BUSY) ? "allocated" : "free"); 1515 } 1516 } 1517 1518 void 1519 vmem_printall(const char *modif, void (*pr)(const char *, ...)) 1520 { 1521 const vmem_t *vm; 1522 1523 LIST_FOREACH(vm, &vmem_list, vm_alllist) { 1524 vmem_dump(vm, pr); 1525 } 1526 } 1527 1528 void 1529 vmem_print(uintptr_t addr, const char *modif, void (*pr)(const char *, ...)) 1530 { 1531 const vmem_t *vm = (const void *)addr; 1532 1533 vmem_dump(vm, pr); 1534 } 1535 #endif /* defined(DDB) */ 1536 1537 #if defined(_KERNEL) 1538 #define vmem_printf printf 1539 #else 1540 #include <stdio.h> 1541 #include <stdarg.h> 1542 1543 static void 1544 vmem_printf(const char *fmt, ...) 1545 { 1546 va_list ap; 1547 va_start(ap, fmt); 1548 vprintf(fmt, ap); 1549 va_end(ap); 1550 } 1551 #endif 1552 1553 #if defined(VMEM_SANITY) 1554 1555 static bool 1556 vmem_check_sanity(vmem_t *vm) 1557 { 1558 const bt_t *bt, *bt2; 1559 1560 KASSERT(vm != NULL); 1561 1562 CIRCLEQ_FOREACH(bt, &vm->vm_seglist, bt_seglist) { 1563 if (bt->bt_start > BT_END(bt)) { 1564 printf("corrupted tag\n"); 1565 bt_dump(bt, vmem_printf); 1566 return false; 1567 } 1568 } 1569 CIRCLEQ_FOREACH(bt, &vm->vm_seglist, bt_seglist) { 1570 CIRCLEQ_FOREACH(bt2, &vm->vm_seglist, bt_seglist) { 1571 if (bt == bt2) { 1572 continue; 1573 } 1574 if (BT_ISSPAN_P(bt) != BT_ISSPAN_P(bt2)) { 1575 continue; 1576 } 1577 if (bt->bt_start <= BT_END(bt2) && 1578 bt2->bt_start <= BT_END(bt)) { 1579 printf("overwrapped tags\n"); 1580 bt_dump(bt, vmem_printf); 1581 bt_dump(bt2, vmem_printf); 1582 return false; 1583 } 1584 } 1585 } 1586 1587 return true; 1588 } 1589 1590 static void 1591 vmem_check(vmem_t *vm) 1592 { 1593 1594 if (!vmem_check_sanity(vm)) { 1595 panic("insanity vmem %p", vm); 1596 } 1597 } 1598 1599 #endif /* defined(VMEM_SANITY) */ 1600 1601 #if defined(UNITTEST) 1602 int 1603 main(void) 1604 { 1605 int rc; 1606 vmem_t *vm; 1607 vmem_addr_t p; 1608 struct reg { 1609 vmem_addr_t p; 1610 vmem_size_t sz; 1611 bool x; 1612 } *reg = NULL; 1613 int nreg = 0; 1614 int nalloc = 0; 1615 int nfree = 0; 1616 vmem_size_t total = 0; 1617 #if 1 1618 vm_flag_t strat = VM_INSTANTFIT; 1619 #else 1620 vm_flag_t strat = VM_BESTFIT; 1621 #endif 1622 1623 vm = vmem_create("test", 0, 0, 1, NULL, NULL, NULL, 0, VM_SLEEP, 1624 #ifdef _KERNEL 1625 IPL_NONE 1626 #else 1627 0 1628 #endif 1629 ); 1630 if (vm == NULL) { 1631 printf("vmem_create\n"); 1632 exit(EXIT_FAILURE); 1633 } 1634 vmem_dump(vm, vmem_printf); 1635 1636 rc = vmem_add(vm, 0, 50, VM_SLEEP); 1637 assert(rc == 0); 1638 rc = vmem_add(vm, 100, 200, VM_SLEEP); 1639 assert(rc == 0); 1640 rc = vmem_add(vm, 2000, 1, VM_SLEEP); 1641 assert(rc == 0); 1642 rc = vmem_add(vm, 40000, 65536, VM_SLEEP); 1643 assert(rc == 0); 1644 rc = vmem_add(vm, 10000, 10000, VM_SLEEP); 1645 assert(rc == 0); 1646 rc = vmem_add(vm, 500, 1000, VM_SLEEP); 1647 assert(rc == 0); 1648 rc = vmem_add(vm, 0xffffff00, 0x100, VM_SLEEP); 1649 assert(rc == 0); 1650 rc = vmem_xalloc(vm, 0x101, 0, 0, 0, 1651 0xffffff00, 0xffffffff, strat|VM_SLEEP, &p); 1652 assert(rc != 0); 1653 rc = vmem_xalloc(vm, 50, 0, 0, 0, 0, 49, strat|VM_SLEEP, &p); 1654 assert(rc == 0 && p == 0); 1655 vmem_xfree(vm, p, 50); 1656 rc = vmem_xalloc(vm, 25, 0, 0, 0, 0, 24, strat|VM_SLEEP, &p); 1657 assert(rc == 0 && p == 0); 1658 rc = vmem_xalloc(vm, 0x100, 0, 0, 0, 1659 0xffffff01, 0xffffffff, strat|VM_SLEEP, &p); 1660 assert(rc != 0); 1661 rc = vmem_xalloc(vm, 0x100, 0, 0, 0, 1662 0xffffff00, 0xfffffffe, strat|VM_SLEEP, &p); 1663 assert(rc != 0); 1664 rc = vmem_xalloc(vm, 0x100, 0, 0, 0, 1665 0xffffff00, 0xffffffff, strat|VM_SLEEP, &p); 1666 assert(rc == 0); 1667 vmem_dump(vm, vmem_printf); 1668 for (;;) { 1669 struct reg *r; 1670 int t = rand() % 100; 1671 1672 if (t > 45) { 1673 /* alloc */ 1674 vmem_size_t sz = rand() % 500 + 1; 1675 bool x; 1676 vmem_size_t align, phase, nocross; 1677 vmem_addr_t minaddr, maxaddr; 1678 1679 if (t > 70) { 1680 x = true; 1681 /* XXX */ 1682 align = 1 << (rand() % 15); 1683 phase = rand() % 65536; 1684 nocross = 1 << (rand() % 15); 1685 if (align <= phase) { 1686 phase = 0; 1687 } 1688 if (VMEM_CROSS_P(phase, phase + sz - 1, 1689 nocross)) { 1690 nocross = 0; 1691 } 1692 do { 1693 minaddr = rand() % 50000; 1694 maxaddr = rand() % 70000; 1695 } while (minaddr > maxaddr); 1696 printf("=== xalloc %" PRIu64 1697 " align=%" PRIu64 ", phase=%" PRIu64 1698 ", nocross=%" PRIu64 ", min=%" PRIu64 1699 ", max=%" PRIu64 "\n", 1700 (uint64_t)sz, 1701 (uint64_t)align, 1702 (uint64_t)phase, 1703 (uint64_t)nocross, 1704 (uint64_t)minaddr, 1705 (uint64_t)maxaddr); 1706 rc = vmem_xalloc(vm, sz, align, phase, nocross, 1707 minaddr, maxaddr, strat|VM_SLEEP, &p); 1708 } else { 1709 x = false; 1710 printf("=== alloc %" PRIu64 "\n", (uint64_t)sz); 1711 rc = vmem_alloc(vm, sz, strat|VM_SLEEP, &p); 1712 } 1713 printf("-> %" PRIu64 "\n", (uint64_t)p); 1714 vmem_dump(vm, vmem_printf); 1715 if (rc != 0) { 1716 if (x) { 1717 continue; 1718 } 1719 break; 1720 } 1721 nreg++; 1722 reg = realloc(reg, sizeof(*reg) * nreg); 1723 r = ®[nreg - 1]; 1724 r->p = p; 1725 r->sz = sz; 1726 r->x = x; 1727 total += sz; 1728 nalloc++; 1729 } else if (nreg != 0) { 1730 /* free */ 1731 r = ®[rand() % nreg]; 1732 printf("=== free %" PRIu64 ", %" PRIu64 "\n", 1733 (uint64_t)r->p, (uint64_t)r->sz); 1734 if (r->x) { 1735 vmem_xfree(vm, r->p, r->sz); 1736 } else { 1737 vmem_free(vm, r->p, r->sz); 1738 } 1739 total -= r->sz; 1740 vmem_dump(vm, vmem_printf); 1741 *r = reg[nreg - 1]; 1742 nreg--; 1743 nfree++; 1744 } 1745 printf("total=%" PRIu64 "\n", (uint64_t)total); 1746 } 1747 fprintf(stderr, "total=%" PRIu64 ", nalloc=%d, nfree=%d\n", 1748 (uint64_t)total, nalloc, nfree); 1749 exit(EXIT_SUCCESS); 1750 } 1751 #endif /* defined(UNITTEST) */ 1752