1 /* $NetBSD: subr_vmem.c,v 1.80 2013/01/29 21:26:24 para Exp $ */ 2 3 /*- 4 * Copyright (c)2006,2007,2008,2009 YAMAMOTO Takashi, 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 */ 28 29 /* 30 * reference: 31 * - Magazines and Vmem: Extending the Slab Allocator 32 * to Many CPUs and Arbitrary Resources 33 * http://www.usenix.org/event/usenix01/bonwick.html 34 */ 35 36 #include <sys/cdefs.h> 37 __KERNEL_RCSID(0, "$NetBSD: subr_vmem.c,v 1.80 2013/01/29 21:26:24 para Exp $"); 38 39 #if defined(_KERNEL) 40 #include "opt_ddb.h" 41 #endif /* defined(_KERNEL) */ 42 43 #include <sys/param.h> 44 #include <sys/hash.h> 45 #include <sys/queue.h> 46 #include <sys/bitops.h> 47 48 #if defined(_KERNEL) 49 #include <sys/systm.h> 50 #include <sys/kernel.h> /* hz */ 51 #include <sys/callout.h> 52 #include <sys/kmem.h> 53 #include <sys/pool.h> 54 #include <sys/vmem.h> 55 #include <sys/vmem_impl.h> 56 #include <sys/workqueue.h> 57 #include <sys/atomic.h> 58 #include <uvm/uvm.h> 59 #include <uvm/uvm_extern.h> 60 #include <uvm/uvm_km.h> 61 #include <uvm/uvm_page.h> 62 #include <uvm/uvm_pdaemon.h> 63 #else /* defined(_KERNEL) */ 64 #include <stdio.h> 65 #include <errno.h> 66 #include <assert.h> 67 #include <stdlib.h> 68 #include <string.h> 69 #include "../sys/vmem.h" 70 #include "../sys/vmem_impl.h" 71 #endif /* defined(_KERNEL) */ 72 73 74 #if defined(_KERNEL) 75 #include <sys/evcnt.h> 76 #define VMEM_EVCNT_DEFINE(name) \ 77 struct evcnt vmem_evcnt_##name = EVCNT_INITIALIZER(EVCNT_TYPE_MISC, NULL, \ 78 "vmemev", #name); \ 79 EVCNT_ATTACH_STATIC(vmem_evcnt_##name); 80 #define VMEM_EVCNT_INCR(ev) vmem_evcnt_##ev.ev_count++ 81 #define VMEM_EVCNT_DECR(ev) vmem_evcnt_##ev.ev_count-- 82 83 VMEM_EVCNT_DEFINE(bt_pages) 84 VMEM_EVCNT_DEFINE(bt_count) 85 VMEM_EVCNT_DEFINE(bt_inuse) 86 87 #define VMEM_CONDVAR_INIT(vm, wchan) cv_init(&vm->vm_cv, wchan) 88 #define VMEM_CONDVAR_DESTROY(vm) cv_destroy(&vm->vm_cv) 89 #define VMEM_CONDVAR_WAIT(vm) cv_wait(&vm->vm_cv, &vm->vm_lock) 90 #define VMEM_CONDVAR_BROADCAST(vm) cv_broadcast(&vm->vm_cv) 91 92 #else /* defined(_KERNEL) */ 93 94 #define VMEM_EVCNT_INCR(ev) /* nothing */ 95 #define VMEM_EVCNT_DECR(ev) /* nothing */ 96 97 #define VMEM_CONDVAR_INIT(vm, wchan) /* nothing */ 98 #define VMEM_CONDVAR_DESTROY(vm) /* nothing */ 99 #define VMEM_CONDVAR_WAIT(vm) /* nothing */ 100 #define VMEM_CONDVAR_BROADCAST(vm) /* nothing */ 101 102 #define UNITTEST 103 #define KASSERT(a) assert(a) 104 #define mutex_init(a, b, c) /* nothing */ 105 #define mutex_destroy(a) /* nothing */ 106 #define mutex_enter(a) /* nothing */ 107 #define mutex_tryenter(a) true 108 #define mutex_exit(a) /* nothing */ 109 #define mutex_owned(a) /* nothing */ 110 #define ASSERT_SLEEPABLE() /* nothing */ 111 #define panic(...) printf(__VA_ARGS__); abort() 112 #endif /* defined(_KERNEL) */ 113 114 #if defined(VMEM_SANITY) 115 static void vmem_check(vmem_t *); 116 #else /* defined(VMEM_SANITY) */ 117 #define vmem_check(vm) /* nothing */ 118 #endif /* defined(VMEM_SANITY) */ 119 120 #define VMEM_HASHSIZE_MIN 1 /* XXX */ 121 #define VMEM_HASHSIZE_MAX 65536 /* XXX */ 122 #define VMEM_HASHSIZE_INIT 1 123 124 #define VM_FITMASK (VM_BESTFIT | VM_INSTANTFIT) 125 126 #if defined(_KERNEL) 127 static bool vmem_bootstrapped = false; 128 static kmutex_t vmem_list_lock; 129 static LIST_HEAD(, vmem) vmem_list = LIST_HEAD_INITIALIZER(vmem_list); 130 #endif /* defined(_KERNEL) */ 131 132 /* ---- misc */ 133 134 #define VMEM_LOCK(vm) mutex_enter(&vm->vm_lock) 135 #define VMEM_TRYLOCK(vm) mutex_tryenter(&vm->vm_lock) 136 #define VMEM_UNLOCK(vm) mutex_exit(&vm->vm_lock) 137 #define VMEM_LOCK_INIT(vm, ipl) mutex_init(&vm->vm_lock, MUTEX_DEFAULT, ipl) 138 #define VMEM_LOCK_DESTROY(vm) mutex_destroy(&vm->vm_lock) 139 #define VMEM_ASSERT_LOCKED(vm) KASSERT(mutex_owned(&vm->vm_lock)) 140 141 #define VMEM_ALIGNUP(addr, align) \ 142 (-(-(addr) & -(align))) 143 144 #define VMEM_CROSS_P(addr1, addr2, boundary) \ 145 ((((addr1) ^ (addr2)) & -(boundary)) != 0) 146 147 #define ORDER2SIZE(order) ((vmem_size_t)1 << (order)) 148 #define SIZE2ORDER(size) ((int)ilog2(size)) 149 150 #if !defined(_KERNEL) 151 #define xmalloc(sz, flags) malloc(sz) 152 #define xfree(p, sz) free(p) 153 #define bt_alloc(vm, flags) malloc(sizeof(bt_t)) 154 #define bt_free(vm, bt) free(bt) 155 #else /* defined(_KERNEL) */ 156 157 #define xmalloc(sz, flags) \ 158 kmem_alloc(sz, ((flags) & VM_SLEEP) ? KM_SLEEP : KM_NOSLEEP); 159 #define xfree(p, sz) kmem_free(p, sz); 160 161 /* 162 * BT_RESERVE calculation: 163 * we allocate memory for boundry tags with vmem, therefor we have 164 * to keep a reserve of bts used to allocated memory for bts. 165 * This reserve is 4 for each arena involved in allocating vmems memory. 166 * BT_MAXFREE: don't cache excessive counts of bts in arenas 167 */ 168 #define STATIC_BT_COUNT 200 169 #define BT_MINRESERVE 4 170 #define BT_MAXFREE 64 171 172 static struct vmem_btag static_bts[STATIC_BT_COUNT]; 173 static int static_bt_count = STATIC_BT_COUNT; 174 175 static struct vmem kmem_va_meta_arena_store; 176 vmem_t *kmem_va_meta_arena; 177 static struct vmem kmem_meta_arena_store; 178 vmem_t *kmem_meta_arena; 179 180 static kmutex_t vmem_refill_lock; 181 static kmutex_t vmem_btag_lock; 182 static LIST_HEAD(, vmem_btag) vmem_btag_freelist; 183 static size_t vmem_btag_freelist_count = 0; 184 static size_t vmem_btag_count = STATIC_BT_COUNT; 185 186 /* ---- boundary tag */ 187 188 #define BT_PER_PAGE (PAGE_SIZE / sizeof(bt_t)) 189 190 static int bt_refill(vmem_t *vm, vm_flag_t flags); 191 192 static int 193 bt_refillglobal(vm_flag_t flags) 194 { 195 vmem_addr_t va; 196 bt_t *btp; 197 bt_t *bt; 198 int i; 199 200 mutex_enter(&vmem_refill_lock); 201 202 mutex_enter(&vmem_btag_lock); 203 if (vmem_btag_freelist_count > 0) { 204 mutex_exit(&vmem_btag_lock); 205 mutex_exit(&vmem_refill_lock); 206 return 0; 207 } else { 208 mutex_exit(&vmem_btag_lock); 209 } 210 211 if (vmem_alloc(kmem_meta_arena, PAGE_SIZE, 212 (flags & ~VM_FITMASK) | VM_INSTANTFIT | VM_POPULATING, &va) != 0) { 213 mutex_exit(&vmem_btag_lock); 214 mutex_exit(&vmem_refill_lock); 215 return ENOMEM; 216 } 217 VMEM_EVCNT_INCR(bt_pages); 218 219 mutex_enter(&vmem_btag_lock); 220 btp = (void *) va; 221 for (i = 0; i < (BT_PER_PAGE); i++) { 222 bt = btp; 223 memset(bt, 0, sizeof(*bt)); 224 LIST_INSERT_HEAD(&vmem_btag_freelist, bt, 225 bt_freelist); 226 vmem_btag_freelist_count++; 227 vmem_btag_count++; 228 VMEM_EVCNT_INCR(bt_count); 229 btp++; 230 } 231 mutex_exit(&vmem_btag_lock); 232 233 bt_refill(kmem_arena, (flags & ~VM_FITMASK) 234 | VM_INSTANTFIT | VM_POPULATING); 235 bt_refill(kmem_va_meta_arena, (flags & ~VM_FITMASK) 236 | VM_INSTANTFIT | VM_POPULATING); 237 bt_refill(kmem_meta_arena, (flags & ~VM_FITMASK) 238 | VM_INSTANTFIT | VM_POPULATING); 239 240 mutex_exit(&vmem_refill_lock); 241 242 return 0; 243 } 244 245 static int 246 bt_refill(vmem_t *vm, vm_flag_t flags) 247 { 248 bt_t *bt; 249 250 if (!(flags & VM_POPULATING)) { 251 bt_refillglobal(flags); 252 } 253 254 VMEM_LOCK(vm); 255 mutex_enter(&vmem_btag_lock); 256 while (!LIST_EMPTY(&vmem_btag_freelist) && 257 vm->vm_nfreetags <= BT_MINRESERVE) { 258 bt = LIST_FIRST(&vmem_btag_freelist); 259 LIST_REMOVE(bt, bt_freelist); 260 LIST_INSERT_HEAD(&vm->vm_freetags, bt, bt_freelist); 261 vm->vm_nfreetags++; 262 vmem_btag_freelist_count--; 263 } 264 mutex_exit(&vmem_btag_lock); 265 266 if (vm->vm_nfreetags == 0) { 267 VMEM_UNLOCK(vm); 268 return ENOMEM; 269 } 270 VMEM_UNLOCK(vm); 271 272 return 0; 273 } 274 275 static inline bt_t * 276 bt_alloc(vmem_t *vm, vm_flag_t flags) 277 { 278 bt_t *bt; 279 again: 280 VMEM_LOCK(vm); 281 if (vm->vm_nfreetags <= BT_MINRESERVE && 282 (flags & VM_POPULATING) == 0) { 283 VMEM_UNLOCK(vm); 284 if (bt_refill(vm, VM_NOSLEEP | VM_INSTANTFIT)) { 285 return NULL; 286 } 287 goto again; 288 } 289 bt = LIST_FIRST(&vm->vm_freetags); 290 LIST_REMOVE(bt, bt_freelist); 291 vm->vm_nfreetags--; 292 VMEM_UNLOCK(vm); 293 VMEM_EVCNT_INCR(bt_inuse); 294 295 return bt; 296 } 297 298 static inline void 299 bt_free(vmem_t *vm, bt_t *bt) 300 { 301 302 VMEM_LOCK(vm); 303 LIST_INSERT_HEAD(&vm->vm_freetags, bt, bt_freelist); 304 vm->vm_nfreetags++; 305 while (vm->vm_nfreetags > BT_MAXFREE) { 306 bt = LIST_FIRST(&vm->vm_freetags); 307 LIST_REMOVE(bt, bt_freelist); 308 vm->vm_nfreetags--; 309 mutex_enter(&vmem_btag_lock); 310 LIST_INSERT_HEAD(&vmem_btag_freelist, bt, bt_freelist); 311 vmem_btag_freelist_count++; 312 mutex_exit(&vmem_btag_lock); 313 } 314 VMEM_UNLOCK(vm); 315 VMEM_EVCNT_DECR(bt_inuse); 316 } 317 318 #endif /* defined(_KERNEL) */ 319 320 /* 321 * freelist[0] ... [1, 1] 322 * freelist[1] ... [2, 3] 323 * freelist[2] ... [4, 7] 324 * freelist[3] ... [8, 15] 325 * : 326 * freelist[n] ... [(1 << n), (1 << (n + 1)) - 1] 327 * : 328 */ 329 330 static struct vmem_freelist * 331 bt_freehead_tofree(vmem_t *vm, vmem_size_t size) 332 { 333 const vmem_size_t qsize = size >> vm->vm_quantum_shift; 334 const int idx = SIZE2ORDER(qsize); 335 336 KASSERT(size != 0 && qsize != 0); 337 KASSERT((size & vm->vm_quantum_mask) == 0); 338 KASSERT(idx >= 0); 339 KASSERT(idx < VMEM_MAXORDER); 340 341 return &vm->vm_freelist[idx]; 342 } 343 344 /* 345 * bt_freehead_toalloc: return the freelist for the given size and allocation 346 * strategy. 347 * 348 * for VM_INSTANTFIT, return the list in which any blocks are large enough 349 * for the requested size. otherwise, return the list which can have blocks 350 * large enough for the requested size. 351 */ 352 353 static struct vmem_freelist * 354 bt_freehead_toalloc(vmem_t *vm, vmem_size_t size, vm_flag_t strat) 355 { 356 const vmem_size_t qsize = size >> vm->vm_quantum_shift; 357 int idx = SIZE2ORDER(qsize); 358 359 KASSERT(size != 0 && qsize != 0); 360 KASSERT((size & vm->vm_quantum_mask) == 0); 361 362 if (strat == VM_INSTANTFIT && ORDER2SIZE(idx) != qsize) { 363 idx++; 364 /* check too large request? */ 365 } 366 KASSERT(idx >= 0); 367 KASSERT(idx < VMEM_MAXORDER); 368 369 return &vm->vm_freelist[idx]; 370 } 371 372 /* ---- boundary tag hash */ 373 374 static struct vmem_hashlist * 375 bt_hashhead(vmem_t *vm, vmem_addr_t addr) 376 { 377 struct vmem_hashlist *list; 378 unsigned int hash; 379 380 hash = hash32_buf(&addr, sizeof(addr), HASH32_BUF_INIT); 381 list = &vm->vm_hashlist[hash % vm->vm_hashsize]; 382 383 return list; 384 } 385 386 static bt_t * 387 bt_lookupbusy(vmem_t *vm, vmem_addr_t addr) 388 { 389 struct vmem_hashlist *list; 390 bt_t *bt; 391 392 list = bt_hashhead(vm, addr); 393 LIST_FOREACH(bt, list, bt_hashlist) { 394 if (bt->bt_start == addr) { 395 break; 396 } 397 } 398 399 return bt; 400 } 401 402 static void 403 bt_rembusy(vmem_t *vm, bt_t *bt) 404 { 405 406 KASSERT(vm->vm_nbusytag > 0); 407 vm->vm_inuse -= bt->bt_size; 408 vm->vm_nbusytag--; 409 LIST_REMOVE(bt, bt_hashlist); 410 } 411 412 static void 413 bt_insbusy(vmem_t *vm, bt_t *bt) 414 { 415 struct vmem_hashlist *list; 416 417 KASSERT(bt->bt_type == BT_TYPE_BUSY); 418 419 list = bt_hashhead(vm, bt->bt_start); 420 LIST_INSERT_HEAD(list, bt, bt_hashlist); 421 vm->vm_nbusytag++; 422 vm->vm_inuse += bt->bt_size; 423 } 424 425 /* ---- boundary tag list */ 426 427 static void 428 bt_remseg(vmem_t *vm, bt_t *bt) 429 { 430 431 CIRCLEQ_REMOVE(&vm->vm_seglist, bt, bt_seglist); 432 } 433 434 static void 435 bt_insseg(vmem_t *vm, bt_t *bt, bt_t *prev) 436 { 437 438 CIRCLEQ_INSERT_AFTER(&vm->vm_seglist, prev, bt, bt_seglist); 439 } 440 441 static void 442 bt_insseg_tail(vmem_t *vm, bt_t *bt) 443 { 444 445 CIRCLEQ_INSERT_TAIL(&vm->vm_seglist, bt, bt_seglist); 446 } 447 448 static void 449 bt_remfree(vmem_t *vm, bt_t *bt) 450 { 451 452 KASSERT(bt->bt_type == BT_TYPE_FREE); 453 454 LIST_REMOVE(bt, bt_freelist); 455 } 456 457 static void 458 bt_insfree(vmem_t *vm, bt_t *bt) 459 { 460 struct vmem_freelist *list; 461 462 list = bt_freehead_tofree(vm, bt->bt_size); 463 LIST_INSERT_HEAD(list, bt, bt_freelist); 464 } 465 466 /* ---- vmem internal functions */ 467 468 #if defined(QCACHE) 469 static inline vm_flag_t 470 prf_to_vmf(int prflags) 471 { 472 vm_flag_t vmflags; 473 474 KASSERT((prflags & ~(PR_LIMITFAIL | PR_WAITOK | PR_NOWAIT)) == 0); 475 if ((prflags & PR_WAITOK) != 0) { 476 vmflags = VM_SLEEP; 477 } else { 478 vmflags = VM_NOSLEEP; 479 } 480 return vmflags; 481 } 482 483 static inline int 484 vmf_to_prf(vm_flag_t vmflags) 485 { 486 int prflags; 487 488 if ((vmflags & VM_SLEEP) != 0) { 489 prflags = PR_WAITOK; 490 } else { 491 prflags = PR_NOWAIT; 492 } 493 return prflags; 494 } 495 496 static size_t 497 qc_poolpage_size(size_t qcache_max) 498 { 499 int i; 500 501 for (i = 0; ORDER2SIZE(i) <= qcache_max * 3; i++) { 502 /* nothing */ 503 } 504 return ORDER2SIZE(i); 505 } 506 507 static void * 508 qc_poolpage_alloc(struct pool *pool, int prflags) 509 { 510 qcache_t *qc = QC_POOL_TO_QCACHE(pool); 511 vmem_t *vm = qc->qc_vmem; 512 vmem_addr_t addr; 513 514 if (vmem_alloc(vm, pool->pr_alloc->pa_pagesz, 515 prf_to_vmf(prflags) | VM_INSTANTFIT, &addr) != 0) 516 return NULL; 517 return (void *)addr; 518 } 519 520 static void 521 qc_poolpage_free(struct pool *pool, void *addr) 522 { 523 qcache_t *qc = QC_POOL_TO_QCACHE(pool); 524 vmem_t *vm = qc->qc_vmem; 525 526 vmem_free(vm, (vmem_addr_t)addr, pool->pr_alloc->pa_pagesz); 527 } 528 529 static void 530 qc_init(vmem_t *vm, size_t qcache_max, int ipl) 531 { 532 qcache_t *prevqc; 533 struct pool_allocator *pa; 534 int qcache_idx_max; 535 int i; 536 537 KASSERT((qcache_max & vm->vm_quantum_mask) == 0); 538 if (qcache_max > (VMEM_QCACHE_IDX_MAX << vm->vm_quantum_shift)) { 539 qcache_max = VMEM_QCACHE_IDX_MAX << vm->vm_quantum_shift; 540 } 541 vm->vm_qcache_max = qcache_max; 542 pa = &vm->vm_qcache_allocator; 543 memset(pa, 0, sizeof(*pa)); 544 pa->pa_alloc = qc_poolpage_alloc; 545 pa->pa_free = qc_poolpage_free; 546 pa->pa_pagesz = qc_poolpage_size(qcache_max); 547 548 qcache_idx_max = qcache_max >> vm->vm_quantum_shift; 549 prevqc = NULL; 550 for (i = qcache_idx_max; i > 0; i--) { 551 qcache_t *qc = &vm->vm_qcache_store[i - 1]; 552 size_t size = i << vm->vm_quantum_shift; 553 pool_cache_t pc; 554 555 qc->qc_vmem = vm; 556 snprintf(qc->qc_name, sizeof(qc->qc_name), "%s-%zu", 557 vm->vm_name, size); 558 559 pc = pool_cache_init(size, 560 ORDER2SIZE(vm->vm_quantum_shift), 0, 561 PR_NOALIGN | PR_NOTOUCH | PR_RECURSIVE /* XXX */, 562 qc->qc_name, pa, ipl, NULL, NULL, NULL); 563 564 KASSERT(pc); 565 566 qc->qc_cache = pc; 567 KASSERT(qc->qc_cache != NULL); /* XXX */ 568 if (prevqc != NULL && 569 qc->qc_cache->pc_pool.pr_itemsperpage == 570 prevqc->qc_cache->pc_pool.pr_itemsperpage) { 571 pool_cache_destroy(qc->qc_cache); 572 vm->vm_qcache[i - 1] = prevqc; 573 continue; 574 } 575 qc->qc_cache->pc_pool.pr_qcache = qc; 576 vm->vm_qcache[i - 1] = qc; 577 prevqc = qc; 578 } 579 } 580 581 static void 582 qc_destroy(vmem_t *vm) 583 { 584 const qcache_t *prevqc; 585 int i; 586 int qcache_idx_max; 587 588 qcache_idx_max = vm->vm_qcache_max >> vm->vm_quantum_shift; 589 prevqc = NULL; 590 for (i = 0; i < qcache_idx_max; i++) { 591 qcache_t *qc = vm->vm_qcache[i]; 592 593 if (prevqc == qc) { 594 continue; 595 } 596 pool_cache_destroy(qc->qc_cache); 597 prevqc = qc; 598 } 599 } 600 #endif 601 602 #if defined(_KERNEL) 603 static void 604 vmem_bootstrap(void) 605 { 606 607 mutex_init(&vmem_list_lock, MUTEX_DEFAULT, IPL_VM); 608 mutex_init(&vmem_refill_lock, MUTEX_DEFAULT, IPL_VM); 609 mutex_init(&vmem_btag_lock, MUTEX_DEFAULT, IPL_VM); 610 611 while (static_bt_count-- > 0) { 612 bt_t *bt = &static_bts[static_bt_count]; 613 LIST_INSERT_HEAD(&vmem_btag_freelist, bt, bt_freelist); 614 VMEM_EVCNT_INCR(bt_count); 615 vmem_btag_freelist_count++; 616 } 617 vmem_bootstrapped = TRUE; 618 } 619 620 void 621 vmem_subsystem_init(vmem_t *vm) 622 { 623 624 kmem_va_meta_arena = vmem_init(&kmem_va_meta_arena_store, "vmem-va", 625 0, 0, PAGE_SIZE, vmem_alloc, vmem_free, vm, 626 0, VM_NOSLEEP | VM_BOOTSTRAP | VM_LARGEIMPORT, 627 IPL_VM); 628 629 kmem_meta_arena = vmem_init(&kmem_meta_arena_store, "vmem-meta", 630 0, 0, PAGE_SIZE, 631 uvm_km_kmem_alloc, uvm_km_kmem_free, kmem_va_meta_arena, 632 0, VM_NOSLEEP | VM_BOOTSTRAP, IPL_VM); 633 } 634 #endif /* defined(_KERNEL) */ 635 636 static int 637 vmem_add1(vmem_t *vm, vmem_addr_t addr, vmem_size_t size, vm_flag_t flags, 638 int spanbttype) 639 { 640 bt_t *btspan; 641 bt_t *btfree; 642 643 KASSERT((flags & (VM_SLEEP|VM_NOSLEEP)) != 0); 644 KASSERT((~flags & (VM_SLEEP|VM_NOSLEEP)) != 0); 645 KASSERT(spanbttype == BT_TYPE_SPAN || 646 spanbttype == BT_TYPE_SPAN_STATIC); 647 648 btspan = bt_alloc(vm, flags); 649 if (btspan == NULL) { 650 return ENOMEM; 651 } 652 btfree = bt_alloc(vm, flags); 653 if (btfree == NULL) { 654 bt_free(vm, btspan); 655 return ENOMEM; 656 } 657 658 btspan->bt_type = spanbttype; 659 btspan->bt_start = addr; 660 btspan->bt_size = size; 661 662 btfree->bt_type = BT_TYPE_FREE; 663 btfree->bt_start = addr; 664 btfree->bt_size = size; 665 666 VMEM_LOCK(vm); 667 bt_insseg_tail(vm, btspan); 668 bt_insseg(vm, btfree, btspan); 669 bt_insfree(vm, btfree); 670 vm->vm_size += size; 671 VMEM_UNLOCK(vm); 672 673 return 0; 674 } 675 676 static void 677 vmem_destroy1(vmem_t *vm) 678 { 679 680 #if defined(QCACHE) 681 qc_destroy(vm); 682 #endif /* defined(QCACHE) */ 683 if (vm->vm_hashlist != NULL) { 684 int i; 685 686 for (i = 0; i < vm->vm_hashsize; i++) { 687 bt_t *bt; 688 689 while ((bt = LIST_FIRST(&vm->vm_hashlist[i])) != NULL) { 690 KASSERT(bt->bt_type == BT_TYPE_SPAN_STATIC); 691 bt_free(vm, bt); 692 } 693 } 694 if (vm->vm_hashlist != &vm->vm_hash0) { 695 xfree(vm->vm_hashlist, 696 sizeof(struct vmem_hashlist *) * vm->vm_hashsize); 697 } 698 } 699 700 while (vm->vm_nfreetags > 0) { 701 bt_t *bt = LIST_FIRST(&vm->vm_freetags); 702 LIST_REMOVE(bt, bt_freelist); 703 vm->vm_nfreetags--; 704 mutex_enter(&vmem_btag_lock); 705 #if defined (_KERNEL) 706 LIST_INSERT_HEAD(&vmem_btag_freelist, bt, bt_freelist); 707 vmem_btag_freelist_count++; 708 #endif /* defined(_KERNEL) */ 709 mutex_exit(&vmem_btag_lock); 710 } 711 712 VMEM_CONDVAR_DESTROY(vm); 713 VMEM_LOCK_DESTROY(vm); 714 xfree(vm, sizeof(*vm)); 715 } 716 717 static int 718 vmem_import(vmem_t *vm, vmem_size_t size, vm_flag_t flags) 719 { 720 vmem_addr_t addr; 721 int rc; 722 723 if (vm->vm_importfn == NULL) { 724 return EINVAL; 725 } 726 727 if (vm->vm_flags & VM_LARGEIMPORT) { 728 size *= 16; 729 } 730 731 if (vm->vm_flags & VM_XIMPORT) { 732 rc = ((vmem_ximport_t *)vm->vm_importfn)(vm->vm_arg, size, 733 &size, flags, &addr); 734 } else { 735 rc = (vm->vm_importfn)(vm->vm_arg, size, flags, &addr); 736 } 737 if (rc) { 738 return ENOMEM; 739 } 740 741 if (vmem_add1(vm, addr, size, flags, BT_TYPE_SPAN) != 0) { 742 (*vm->vm_releasefn)(vm->vm_arg, addr, size); 743 return ENOMEM; 744 } 745 746 return 0; 747 } 748 749 static int 750 vmem_rehash(vmem_t *vm, size_t newhashsize, vm_flag_t flags) 751 { 752 bt_t *bt; 753 int i; 754 struct vmem_hashlist *newhashlist; 755 struct vmem_hashlist *oldhashlist; 756 size_t oldhashsize; 757 758 KASSERT(newhashsize > 0); 759 760 newhashlist = 761 xmalloc(sizeof(struct vmem_hashlist *) * newhashsize, flags); 762 if (newhashlist == NULL) { 763 return ENOMEM; 764 } 765 for (i = 0; i < newhashsize; i++) { 766 LIST_INIT(&newhashlist[i]); 767 } 768 769 if (!VMEM_TRYLOCK(vm)) { 770 xfree(newhashlist, 771 sizeof(struct vmem_hashlist *) * newhashsize); 772 return EBUSY; 773 } 774 oldhashlist = vm->vm_hashlist; 775 oldhashsize = vm->vm_hashsize; 776 vm->vm_hashlist = newhashlist; 777 vm->vm_hashsize = newhashsize; 778 if (oldhashlist == NULL) { 779 VMEM_UNLOCK(vm); 780 return 0; 781 } 782 for (i = 0; i < oldhashsize; i++) { 783 while ((bt = LIST_FIRST(&oldhashlist[i])) != NULL) { 784 bt_rembusy(vm, bt); /* XXX */ 785 bt_insbusy(vm, bt); 786 } 787 } 788 VMEM_UNLOCK(vm); 789 790 if (oldhashlist != &vm->vm_hash0) { 791 xfree(oldhashlist, 792 sizeof(struct vmem_hashlist *) * oldhashsize); 793 } 794 795 return 0; 796 } 797 798 /* 799 * vmem_fit: check if a bt can satisfy the given restrictions. 800 * 801 * it's a caller's responsibility to ensure the region is big enough 802 * before calling us. 803 */ 804 805 static int 806 vmem_fit(const bt_t *bt, vmem_size_t size, vmem_size_t align, 807 vmem_size_t phase, vmem_size_t nocross, 808 vmem_addr_t minaddr, vmem_addr_t maxaddr, vmem_addr_t *addrp) 809 { 810 vmem_addr_t start; 811 vmem_addr_t end; 812 813 KASSERT(size > 0); 814 KASSERT(bt->bt_size >= size); /* caller's responsibility */ 815 816 /* 817 * XXX assumption: vmem_addr_t and vmem_size_t are 818 * unsigned integer of the same size. 819 */ 820 821 start = bt->bt_start; 822 if (start < minaddr) { 823 start = minaddr; 824 } 825 end = BT_END(bt); 826 if (end > maxaddr) { 827 end = maxaddr; 828 } 829 if (start > end) { 830 return ENOMEM; 831 } 832 833 start = VMEM_ALIGNUP(start - phase, align) + phase; 834 if (start < bt->bt_start) { 835 start += align; 836 } 837 if (VMEM_CROSS_P(start, start + size - 1, nocross)) { 838 KASSERT(align < nocross); 839 start = VMEM_ALIGNUP(start - phase, nocross) + phase; 840 } 841 if (start <= end && end - start >= size - 1) { 842 KASSERT((start & (align - 1)) == phase); 843 KASSERT(!VMEM_CROSS_P(start, start + size - 1, nocross)); 844 KASSERT(minaddr <= start); 845 KASSERT(maxaddr == 0 || start + size - 1 <= maxaddr); 846 KASSERT(bt->bt_start <= start); 847 KASSERT(BT_END(bt) - start >= size - 1); 848 *addrp = start; 849 return 0; 850 } 851 return ENOMEM; 852 } 853 854 /* ---- vmem API */ 855 856 /* 857 * vmem_create_internal: creates a vmem arena. 858 */ 859 860 vmem_t * 861 vmem_init(vmem_t *vm, const char *name, 862 vmem_addr_t base, vmem_size_t size, vmem_size_t quantum, 863 vmem_import_t *importfn, vmem_release_t *releasefn, 864 vmem_t *arg, vmem_size_t qcache_max, vm_flag_t flags, int ipl) 865 { 866 int i; 867 868 KASSERT((flags & (VM_SLEEP|VM_NOSLEEP)) != 0); 869 KASSERT((~flags & (VM_SLEEP|VM_NOSLEEP)) != 0); 870 KASSERT(quantum > 0); 871 872 #if defined(_KERNEL) 873 /* XXX: SMP, we get called early... */ 874 if (!vmem_bootstrapped) { 875 vmem_bootstrap(); 876 } 877 #endif /* defined(_KERNEL) */ 878 879 if (vm == NULL) { 880 vm = xmalloc(sizeof(*vm), flags); 881 } 882 if (vm == NULL) { 883 return NULL; 884 } 885 886 VMEM_CONDVAR_INIT(vm, "vmem"); 887 VMEM_LOCK_INIT(vm, ipl); 888 vm->vm_flags = flags; 889 vm->vm_nfreetags = 0; 890 LIST_INIT(&vm->vm_freetags); 891 strlcpy(vm->vm_name, name, sizeof(vm->vm_name)); 892 vm->vm_quantum_mask = quantum - 1; 893 vm->vm_quantum_shift = SIZE2ORDER(quantum); 894 KASSERT(ORDER2SIZE(vm->vm_quantum_shift) == quantum); 895 vm->vm_importfn = importfn; 896 vm->vm_releasefn = releasefn; 897 vm->vm_arg = arg; 898 vm->vm_nbusytag = 0; 899 vm->vm_size = 0; 900 vm->vm_inuse = 0; 901 #if defined(QCACHE) 902 qc_init(vm, qcache_max, ipl); 903 #endif /* defined(QCACHE) */ 904 905 CIRCLEQ_INIT(&vm->vm_seglist); 906 for (i = 0; i < VMEM_MAXORDER; i++) { 907 LIST_INIT(&vm->vm_freelist[i]); 908 } 909 memset(&vm->vm_hash0, 0, sizeof(struct vmem_hashlist)); 910 vm->vm_hashsize = 1; 911 vm->vm_hashlist = &vm->vm_hash0; 912 913 if (size != 0) { 914 if (vmem_add(vm, base, size, flags) != 0) { 915 vmem_destroy1(vm); 916 return NULL; 917 } 918 } 919 920 #if defined(_KERNEL) 921 if (flags & VM_BOOTSTRAP) { 922 bt_refill(vm, VM_NOSLEEP); 923 } 924 925 mutex_enter(&vmem_list_lock); 926 LIST_INSERT_HEAD(&vmem_list, vm, vm_alllist); 927 mutex_exit(&vmem_list_lock); 928 #endif /* defined(_KERNEL) */ 929 930 return vm; 931 } 932 933 934 935 /* 936 * vmem_create: create an arena. 937 * 938 * => must not be called from interrupt context. 939 */ 940 941 vmem_t * 942 vmem_create(const char *name, vmem_addr_t base, vmem_size_t size, 943 vmem_size_t quantum, vmem_import_t *importfn, vmem_release_t *releasefn, 944 vmem_t *source, vmem_size_t qcache_max, vm_flag_t flags, int ipl) 945 { 946 947 KASSERT((flags & (VM_XIMPORT)) == 0); 948 949 return vmem_init(NULL, name, base, size, quantum, 950 importfn, releasefn, source, qcache_max, flags, ipl); 951 } 952 953 /* 954 * vmem_xcreate: create an arena takes alternative import func. 955 * 956 * => must not be called from interrupt context. 957 */ 958 959 vmem_t * 960 vmem_xcreate(const char *name, vmem_addr_t base, vmem_size_t size, 961 vmem_size_t quantum, vmem_ximport_t *importfn, vmem_release_t *releasefn, 962 vmem_t *source, vmem_size_t qcache_max, vm_flag_t flags, int ipl) 963 { 964 965 KASSERT((flags & (VM_XIMPORT)) == 0); 966 967 return vmem_init(NULL, name, base, size, quantum, 968 (vmem_import_t *)importfn, releasefn, source, 969 qcache_max, flags | VM_XIMPORT, ipl); 970 } 971 972 void 973 vmem_destroy(vmem_t *vm) 974 { 975 976 #if defined(_KERNEL) 977 mutex_enter(&vmem_list_lock); 978 LIST_REMOVE(vm, vm_alllist); 979 mutex_exit(&vmem_list_lock); 980 #endif /* defined(_KERNEL) */ 981 982 vmem_destroy1(vm); 983 } 984 985 vmem_size_t 986 vmem_roundup_size(vmem_t *vm, vmem_size_t size) 987 { 988 989 return (size + vm->vm_quantum_mask) & ~vm->vm_quantum_mask; 990 } 991 992 /* 993 * vmem_alloc: 994 * 995 * => caller must ensure appropriate spl, 996 * if the arena can be accessed from interrupt context. 997 */ 998 999 int 1000 vmem_alloc(vmem_t *vm, vmem_size_t size, vm_flag_t flags, vmem_addr_t *addrp) 1001 { 1002 const vm_flag_t strat __unused = flags & VM_FITMASK; 1003 1004 KASSERT((flags & (VM_SLEEP|VM_NOSLEEP)) != 0); 1005 KASSERT((~flags & (VM_SLEEP|VM_NOSLEEP)) != 0); 1006 1007 KASSERT(size > 0); 1008 KASSERT(strat == VM_BESTFIT || strat == VM_INSTANTFIT); 1009 if ((flags & VM_SLEEP) != 0) { 1010 ASSERT_SLEEPABLE(); 1011 } 1012 1013 #if defined(QCACHE) 1014 if (size <= vm->vm_qcache_max) { 1015 void *p; 1016 int qidx = (size + vm->vm_quantum_mask) >> vm->vm_quantum_shift; 1017 qcache_t *qc = vm->vm_qcache[qidx - 1]; 1018 1019 p = pool_cache_get(qc->qc_cache, vmf_to_prf(flags)); 1020 if (addrp != NULL) 1021 *addrp = (vmem_addr_t)p; 1022 return (p == NULL) ? ENOMEM : 0; 1023 } 1024 #endif /* defined(QCACHE) */ 1025 1026 return vmem_xalloc(vm, size, 0, 0, 0, VMEM_ADDR_MIN, VMEM_ADDR_MAX, 1027 flags, addrp); 1028 } 1029 1030 int 1031 vmem_xalloc(vmem_t *vm, const vmem_size_t size0, vmem_size_t align, 1032 const vmem_size_t phase, const vmem_size_t nocross, 1033 const vmem_addr_t minaddr, const vmem_addr_t maxaddr, const vm_flag_t flags, 1034 vmem_addr_t *addrp) 1035 { 1036 struct vmem_freelist *list; 1037 struct vmem_freelist *first; 1038 struct vmem_freelist *end; 1039 bt_t *bt; 1040 bt_t *btnew; 1041 bt_t *btnew2; 1042 const vmem_size_t size = vmem_roundup_size(vm, size0); 1043 vm_flag_t strat = flags & VM_FITMASK; 1044 vmem_addr_t start; 1045 int rc; 1046 1047 KASSERT(size0 > 0); 1048 KASSERT(size > 0); 1049 KASSERT(strat == VM_BESTFIT || strat == VM_INSTANTFIT); 1050 if ((flags & VM_SLEEP) != 0) { 1051 ASSERT_SLEEPABLE(); 1052 } 1053 KASSERT((align & vm->vm_quantum_mask) == 0); 1054 KASSERT((align & (align - 1)) == 0); 1055 KASSERT((phase & vm->vm_quantum_mask) == 0); 1056 KASSERT((nocross & vm->vm_quantum_mask) == 0); 1057 KASSERT((nocross & (nocross - 1)) == 0); 1058 KASSERT((align == 0 && phase == 0) || phase < align); 1059 KASSERT(nocross == 0 || nocross >= size); 1060 KASSERT(minaddr <= maxaddr); 1061 KASSERT(!VMEM_CROSS_P(phase, phase + size - 1, nocross)); 1062 1063 if (align == 0) { 1064 align = vm->vm_quantum_mask + 1; 1065 } 1066 1067 /* 1068 * allocate boundary tags before acquiring the vmem lock. 1069 */ 1070 btnew = bt_alloc(vm, flags); 1071 if (btnew == NULL) { 1072 return ENOMEM; 1073 } 1074 btnew2 = bt_alloc(vm, flags); /* XXX not necessary if no restrictions */ 1075 if (btnew2 == NULL) { 1076 bt_free(vm, btnew); 1077 return ENOMEM; 1078 } 1079 1080 /* 1081 * choose a free block from which we allocate. 1082 */ 1083 retry_strat: 1084 first = bt_freehead_toalloc(vm, size, strat); 1085 end = &vm->vm_freelist[VMEM_MAXORDER]; 1086 retry: 1087 bt = NULL; 1088 VMEM_LOCK(vm); 1089 vmem_check(vm); 1090 if (strat == VM_INSTANTFIT) { 1091 /* 1092 * just choose the first block which satisfies our restrictions. 1093 * 1094 * note that we don't need to check the size of the blocks 1095 * because any blocks found on these list should be larger than 1096 * the given size. 1097 */ 1098 for (list = first; list < end; list++) { 1099 bt = LIST_FIRST(list); 1100 if (bt != NULL) { 1101 rc = vmem_fit(bt, size, align, phase, 1102 nocross, minaddr, maxaddr, &start); 1103 if (rc == 0) { 1104 goto gotit; 1105 } 1106 /* 1107 * don't bother to follow the bt_freelist link 1108 * here. the list can be very long and we are 1109 * told to run fast. blocks from the later free 1110 * lists are larger and have better chances to 1111 * satisfy our restrictions. 1112 */ 1113 } 1114 } 1115 } else { /* VM_BESTFIT */ 1116 /* 1117 * we assume that, for space efficiency, it's better to 1118 * allocate from a smaller block. thus we will start searching 1119 * from the lower-order list than VM_INSTANTFIT. 1120 * however, don't bother to find the smallest block in a free 1121 * list because the list can be very long. we can revisit it 1122 * if/when it turns out to be a problem. 1123 * 1124 * note that the 'first' list can contain blocks smaller than 1125 * the requested size. thus we need to check bt_size. 1126 */ 1127 for (list = first; list < end; list++) { 1128 LIST_FOREACH(bt, list, bt_freelist) { 1129 if (bt->bt_size >= size) { 1130 rc = vmem_fit(bt, size, align, phase, 1131 nocross, minaddr, maxaddr, &start); 1132 if (rc == 0) { 1133 goto gotit; 1134 } 1135 } 1136 } 1137 } 1138 } 1139 VMEM_UNLOCK(vm); 1140 #if 1 1141 if (strat == VM_INSTANTFIT) { 1142 strat = VM_BESTFIT; 1143 goto retry_strat; 1144 } 1145 #endif 1146 if (align != vm->vm_quantum_mask + 1 || phase != 0 || nocross != 0) { 1147 1148 /* 1149 * XXX should try to import a region large enough to 1150 * satisfy restrictions? 1151 */ 1152 1153 goto fail; 1154 } 1155 /* XXX eeek, minaddr & maxaddr not respected */ 1156 if (vmem_import(vm, size, flags) == 0) { 1157 goto retry; 1158 } 1159 /* XXX */ 1160 1161 if ((flags & VM_SLEEP) != 0) { 1162 #if defined(_KERNEL) && !defined(_RUMPKERNEL) 1163 mutex_spin_enter(&uvm_fpageqlock); 1164 uvm_kick_pdaemon(); 1165 mutex_spin_exit(&uvm_fpageqlock); 1166 #endif 1167 VMEM_LOCK(vm); 1168 VMEM_CONDVAR_WAIT(vm); 1169 VMEM_UNLOCK(vm); 1170 goto retry; 1171 } 1172 fail: 1173 bt_free(vm, btnew); 1174 bt_free(vm, btnew2); 1175 return ENOMEM; 1176 1177 gotit: 1178 KASSERT(bt->bt_type == BT_TYPE_FREE); 1179 KASSERT(bt->bt_size >= size); 1180 bt_remfree(vm, bt); 1181 vmem_check(vm); 1182 if (bt->bt_start != start) { 1183 btnew2->bt_type = BT_TYPE_FREE; 1184 btnew2->bt_start = bt->bt_start; 1185 btnew2->bt_size = start - bt->bt_start; 1186 bt->bt_start = start; 1187 bt->bt_size -= btnew2->bt_size; 1188 bt_insfree(vm, btnew2); 1189 bt_insseg(vm, btnew2, CIRCLEQ_PREV(bt, bt_seglist)); 1190 btnew2 = NULL; 1191 vmem_check(vm); 1192 } 1193 KASSERT(bt->bt_start == start); 1194 if (bt->bt_size != size && bt->bt_size - size > vm->vm_quantum_mask) { 1195 /* split */ 1196 btnew->bt_type = BT_TYPE_BUSY; 1197 btnew->bt_start = bt->bt_start; 1198 btnew->bt_size = size; 1199 bt->bt_start = bt->bt_start + size; 1200 bt->bt_size -= size; 1201 bt_insfree(vm, bt); 1202 bt_insseg(vm, btnew, CIRCLEQ_PREV(bt, bt_seglist)); 1203 bt_insbusy(vm, btnew); 1204 vmem_check(vm); 1205 VMEM_UNLOCK(vm); 1206 } else { 1207 bt->bt_type = BT_TYPE_BUSY; 1208 bt_insbusy(vm, bt); 1209 vmem_check(vm); 1210 VMEM_UNLOCK(vm); 1211 bt_free(vm, btnew); 1212 btnew = bt; 1213 } 1214 if (btnew2 != NULL) { 1215 bt_free(vm, btnew2); 1216 } 1217 KASSERT(btnew->bt_size >= size); 1218 btnew->bt_type = BT_TYPE_BUSY; 1219 1220 if (addrp != NULL) 1221 *addrp = btnew->bt_start; 1222 return 0; 1223 } 1224 1225 /* 1226 * vmem_free: 1227 * 1228 * => caller must ensure appropriate spl, 1229 * if the arena can be accessed from interrupt context. 1230 */ 1231 1232 void 1233 vmem_free(vmem_t *vm, vmem_addr_t addr, vmem_size_t size) 1234 { 1235 1236 KASSERT(size > 0); 1237 1238 #if defined(QCACHE) 1239 if (size <= vm->vm_qcache_max) { 1240 int qidx = (size + vm->vm_quantum_mask) >> vm->vm_quantum_shift; 1241 qcache_t *qc = vm->vm_qcache[qidx - 1]; 1242 1243 pool_cache_put(qc->qc_cache, (void *)addr); 1244 return; 1245 } 1246 #endif /* defined(QCACHE) */ 1247 1248 vmem_xfree(vm, addr, size); 1249 } 1250 1251 void 1252 vmem_xfree(vmem_t *vm, vmem_addr_t addr, vmem_size_t size) 1253 { 1254 bt_t *bt; 1255 bt_t *t; 1256 LIST_HEAD(, vmem_btag) tofree; 1257 1258 LIST_INIT(&tofree); 1259 1260 KASSERT(size > 0); 1261 1262 VMEM_LOCK(vm); 1263 1264 bt = bt_lookupbusy(vm, addr); 1265 KASSERT(bt != NULL); 1266 KASSERT(bt->bt_start == addr); 1267 KASSERT(bt->bt_size == vmem_roundup_size(vm, size) || 1268 bt->bt_size - vmem_roundup_size(vm, size) <= vm->vm_quantum_mask); 1269 KASSERT(bt->bt_type == BT_TYPE_BUSY); 1270 bt_rembusy(vm, bt); 1271 bt->bt_type = BT_TYPE_FREE; 1272 1273 /* coalesce */ 1274 t = CIRCLEQ_NEXT(bt, bt_seglist); 1275 if (t != NULL && t->bt_type == BT_TYPE_FREE) { 1276 KASSERT(BT_END(bt) < t->bt_start); /* YYY */ 1277 bt_remfree(vm, t); 1278 bt_remseg(vm, t); 1279 bt->bt_size += t->bt_size; 1280 LIST_INSERT_HEAD(&tofree, t, bt_freelist); 1281 } 1282 t = CIRCLEQ_PREV(bt, bt_seglist); 1283 if (t != NULL && t->bt_type == BT_TYPE_FREE) { 1284 KASSERT(BT_END(t) < bt->bt_start); /* YYY */ 1285 bt_remfree(vm, t); 1286 bt_remseg(vm, t); 1287 bt->bt_size += t->bt_size; 1288 bt->bt_start = t->bt_start; 1289 LIST_INSERT_HEAD(&tofree, t, bt_freelist); 1290 } 1291 1292 t = CIRCLEQ_PREV(bt, bt_seglist); 1293 KASSERT(t != NULL); 1294 KASSERT(BT_ISSPAN_P(t) || t->bt_type == BT_TYPE_BUSY); 1295 if (vm->vm_releasefn != NULL && t->bt_type == BT_TYPE_SPAN && 1296 t->bt_size == bt->bt_size) { 1297 vmem_addr_t spanaddr; 1298 vmem_size_t spansize; 1299 1300 KASSERT(t->bt_start == bt->bt_start); 1301 spanaddr = bt->bt_start; 1302 spansize = bt->bt_size; 1303 bt_remseg(vm, bt); 1304 LIST_INSERT_HEAD(&tofree, bt, bt_freelist); 1305 bt_remseg(vm, t); 1306 LIST_INSERT_HEAD(&tofree, t, bt_freelist); 1307 vm->vm_size -= spansize; 1308 VMEM_CONDVAR_BROADCAST(vm); 1309 VMEM_UNLOCK(vm); 1310 (*vm->vm_releasefn)(vm->vm_arg, spanaddr, spansize); 1311 } else { 1312 bt_insfree(vm, bt); 1313 VMEM_CONDVAR_BROADCAST(vm); 1314 VMEM_UNLOCK(vm); 1315 } 1316 1317 while (!LIST_EMPTY(&tofree)) { 1318 t = LIST_FIRST(&tofree); 1319 LIST_REMOVE(t, bt_freelist); 1320 bt_free(vm, t); 1321 } 1322 } 1323 1324 /* 1325 * vmem_add: 1326 * 1327 * => caller must ensure appropriate spl, 1328 * if the arena can be accessed from interrupt context. 1329 */ 1330 1331 int 1332 vmem_add(vmem_t *vm, vmem_addr_t addr, vmem_size_t size, vm_flag_t flags) 1333 { 1334 1335 return vmem_add1(vm, addr, size, flags, BT_TYPE_SPAN_STATIC); 1336 } 1337 1338 /* 1339 * vmem_size: information about arenas size 1340 * 1341 * => return free/allocated size in arena 1342 */ 1343 vmem_size_t 1344 vmem_size(vmem_t *vm, int typemask) 1345 { 1346 1347 switch (typemask) { 1348 case VMEM_ALLOC: 1349 return vm->vm_inuse; 1350 case VMEM_FREE: 1351 return vm->vm_size - vm->vm_inuse; 1352 case VMEM_FREE|VMEM_ALLOC: 1353 return vm->vm_size; 1354 default: 1355 panic("vmem_size"); 1356 } 1357 } 1358 1359 /* ---- rehash */ 1360 1361 #if defined(_KERNEL) 1362 static struct callout vmem_rehash_ch; 1363 static int vmem_rehash_interval; 1364 static struct workqueue *vmem_rehash_wq; 1365 static struct work vmem_rehash_wk; 1366 1367 static void 1368 vmem_rehash_all(struct work *wk, void *dummy) 1369 { 1370 vmem_t *vm; 1371 1372 KASSERT(wk == &vmem_rehash_wk); 1373 mutex_enter(&vmem_list_lock); 1374 LIST_FOREACH(vm, &vmem_list, vm_alllist) { 1375 size_t desired; 1376 size_t current; 1377 1378 if (!VMEM_TRYLOCK(vm)) { 1379 continue; 1380 } 1381 desired = vm->vm_nbusytag; 1382 current = vm->vm_hashsize; 1383 VMEM_UNLOCK(vm); 1384 1385 if (desired > VMEM_HASHSIZE_MAX) { 1386 desired = VMEM_HASHSIZE_MAX; 1387 } else if (desired < VMEM_HASHSIZE_MIN) { 1388 desired = VMEM_HASHSIZE_MIN; 1389 } 1390 if (desired > current * 2 || desired * 2 < current) { 1391 vmem_rehash(vm, desired, VM_NOSLEEP); 1392 } 1393 } 1394 mutex_exit(&vmem_list_lock); 1395 1396 callout_schedule(&vmem_rehash_ch, vmem_rehash_interval); 1397 } 1398 1399 static void 1400 vmem_rehash_all_kick(void *dummy) 1401 { 1402 1403 workqueue_enqueue(vmem_rehash_wq, &vmem_rehash_wk, NULL); 1404 } 1405 1406 void 1407 vmem_rehash_start(void) 1408 { 1409 int error; 1410 1411 error = workqueue_create(&vmem_rehash_wq, "vmem_rehash", 1412 vmem_rehash_all, NULL, PRI_VM, IPL_SOFTCLOCK, WQ_MPSAFE); 1413 if (error) { 1414 panic("%s: workqueue_create %d\n", __func__, error); 1415 } 1416 callout_init(&vmem_rehash_ch, CALLOUT_MPSAFE); 1417 callout_setfunc(&vmem_rehash_ch, vmem_rehash_all_kick, NULL); 1418 1419 vmem_rehash_interval = hz * 10; 1420 callout_schedule(&vmem_rehash_ch, vmem_rehash_interval); 1421 } 1422 #endif /* defined(_KERNEL) */ 1423 1424 /* ---- debug */ 1425 1426 #if defined(DDB) || defined(UNITTEST) || defined(VMEM_SANITY) 1427 1428 static void bt_dump(const bt_t *, void (*)(const char *, ...)); 1429 1430 static const char * 1431 bt_type_string(int type) 1432 { 1433 static const char * const table[] = { 1434 [BT_TYPE_BUSY] = "busy", 1435 [BT_TYPE_FREE] = "free", 1436 [BT_TYPE_SPAN] = "span", 1437 [BT_TYPE_SPAN_STATIC] = "static span", 1438 }; 1439 1440 if (type >= __arraycount(table)) { 1441 return "BOGUS"; 1442 } 1443 return table[type]; 1444 } 1445 1446 static void 1447 bt_dump(const bt_t *bt, void (*pr)(const char *, ...)) 1448 { 1449 1450 (*pr)("\t%p: %" PRIu64 ", %" PRIu64 ", %d(%s)\n", 1451 bt, (uint64_t)bt->bt_start, (uint64_t)bt->bt_size, 1452 bt->bt_type, bt_type_string(bt->bt_type)); 1453 } 1454 1455 static void 1456 vmem_dump(const vmem_t *vm , void (*pr)(const char *, ...)) 1457 { 1458 const bt_t *bt; 1459 int i; 1460 1461 (*pr)("vmem %p '%s'\n", vm, vm->vm_name); 1462 CIRCLEQ_FOREACH(bt, &vm->vm_seglist, bt_seglist) { 1463 bt_dump(bt, pr); 1464 } 1465 1466 for (i = 0; i < VMEM_MAXORDER; i++) { 1467 const struct vmem_freelist *fl = &vm->vm_freelist[i]; 1468 1469 if (LIST_EMPTY(fl)) { 1470 continue; 1471 } 1472 1473 (*pr)("freelist[%d]\n", i); 1474 LIST_FOREACH(bt, fl, bt_freelist) { 1475 bt_dump(bt, pr); 1476 } 1477 } 1478 } 1479 1480 #endif /* defined(DDB) || defined(UNITTEST) || defined(VMEM_SANITY) */ 1481 1482 #if defined(DDB) 1483 static bt_t * 1484 vmem_whatis_lookup(vmem_t *vm, uintptr_t addr) 1485 { 1486 bt_t *bt; 1487 1488 CIRCLEQ_FOREACH(bt, &vm->vm_seglist, bt_seglist) { 1489 if (BT_ISSPAN_P(bt)) { 1490 continue; 1491 } 1492 if (bt->bt_start <= addr && addr <= BT_END(bt)) { 1493 return bt; 1494 } 1495 } 1496 1497 return NULL; 1498 } 1499 1500 void 1501 vmem_whatis(uintptr_t addr, void (*pr)(const char *, ...)) 1502 { 1503 vmem_t *vm; 1504 1505 LIST_FOREACH(vm, &vmem_list, vm_alllist) { 1506 bt_t *bt; 1507 1508 bt = vmem_whatis_lookup(vm, addr); 1509 if (bt == NULL) { 1510 continue; 1511 } 1512 (*pr)("%p is %p+%zu in VMEM '%s' (%s)\n", 1513 (void *)addr, (void *)bt->bt_start, 1514 (size_t)(addr - bt->bt_start), vm->vm_name, 1515 (bt->bt_type == BT_TYPE_BUSY) ? "allocated" : "free"); 1516 } 1517 } 1518 1519 void 1520 vmem_printall(const char *modif, void (*pr)(const char *, ...)) 1521 { 1522 const vmem_t *vm; 1523 1524 LIST_FOREACH(vm, &vmem_list, vm_alllist) { 1525 vmem_dump(vm, pr); 1526 } 1527 } 1528 1529 void 1530 vmem_print(uintptr_t addr, const char *modif, void (*pr)(const char *, ...)) 1531 { 1532 const vmem_t *vm = (const void *)addr; 1533 1534 vmem_dump(vm, pr); 1535 } 1536 #endif /* defined(DDB) */ 1537 1538 #if defined(_KERNEL) 1539 #define vmem_printf printf 1540 #else 1541 #include <stdio.h> 1542 #include <stdarg.h> 1543 1544 static void 1545 vmem_printf(const char *fmt, ...) 1546 { 1547 va_list ap; 1548 va_start(ap, fmt); 1549 vprintf(fmt, ap); 1550 va_end(ap); 1551 } 1552 #endif 1553 1554 #if defined(VMEM_SANITY) 1555 1556 static bool 1557 vmem_check_sanity(vmem_t *vm) 1558 { 1559 const bt_t *bt, *bt2; 1560 1561 KASSERT(vm != NULL); 1562 1563 CIRCLEQ_FOREACH(bt, &vm->vm_seglist, bt_seglist) { 1564 if (bt->bt_start > BT_END(bt)) { 1565 printf("corrupted tag\n"); 1566 bt_dump(bt, vmem_printf); 1567 return false; 1568 } 1569 } 1570 CIRCLEQ_FOREACH(bt, &vm->vm_seglist, bt_seglist) { 1571 CIRCLEQ_FOREACH(bt2, &vm->vm_seglist, bt_seglist) { 1572 if (bt == bt2) { 1573 continue; 1574 } 1575 if (BT_ISSPAN_P(bt) != BT_ISSPAN_P(bt2)) { 1576 continue; 1577 } 1578 if (bt->bt_start <= BT_END(bt2) && 1579 bt2->bt_start <= BT_END(bt)) { 1580 printf("overwrapped tags\n"); 1581 bt_dump(bt, vmem_printf); 1582 bt_dump(bt2, vmem_printf); 1583 return false; 1584 } 1585 } 1586 } 1587 1588 return true; 1589 } 1590 1591 static void 1592 vmem_check(vmem_t *vm) 1593 { 1594 1595 if (!vmem_check_sanity(vm)) { 1596 panic("insanity vmem %p", vm); 1597 } 1598 } 1599 1600 #endif /* defined(VMEM_SANITY) */ 1601 1602 #if defined(UNITTEST) 1603 int 1604 main(void) 1605 { 1606 int rc; 1607 vmem_t *vm; 1608 vmem_addr_t p; 1609 struct reg { 1610 vmem_addr_t p; 1611 vmem_size_t sz; 1612 bool x; 1613 } *reg = NULL; 1614 int nreg = 0; 1615 int nalloc = 0; 1616 int nfree = 0; 1617 vmem_size_t total = 0; 1618 #if 1 1619 vm_flag_t strat = VM_INSTANTFIT; 1620 #else 1621 vm_flag_t strat = VM_BESTFIT; 1622 #endif 1623 1624 vm = vmem_create("test", 0, 0, 1, NULL, NULL, NULL, 0, VM_SLEEP, 1625 #ifdef _KERNEL 1626 IPL_NONE 1627 #else 1628 0 1629 #endif 1630 ); 1631 if (vm == NULL) { 1632 printf("vmem_create\n"); 1633 exit(EXIT_FAILURE); 1634 } 1635 vmem_dump(vm, vmem_printf); 1636 1637 rc = vmem_add(vm, 0, 50, VM_SLEEP); 1638 assert(rc == 0); 1639 rc = vmem_add(vm, 100, 200, VM_SLEEP); 1640 assert(rc == 0); 1641 rc = vmem_add(vm, 2000, 1, VM_SLEEP); 1642 assert(rc == 0); 1643 rc = vmem_add(vm, 40000, 65536, VM_SLEEP); 1644 assert(rc == 0); 1645 rc = vmem_add(vm, 10000, 10000, VM_SLEEP); 1646 assert(rc == 0); 1647 rc = vmem_add(vm, 500, 1000, VM_SLEEP); 1648 assert(rc == 0); 1649 rc = vmem_add(vm, 0xffffff00, 0x100, VM_SLEEP); 1650 assert(rc == 0); 1651 rc = vmem_xalloc(vm, 0x101, 0, 0, 0, 1652 0xffffff00, 0xffffffff, strat|VM_SLEEP, &p); 1653 assert(rc != 0); 1654 rc = vmem_xalloc(vm, 50, 0, 0, 0, 0, 49, strat|VM_SLEEP, &p); 1655 assert(rc == 0 && p == 0); 1656 vmem_xfree(vm, p, 50); 1657 rc = vmem_xalloc(vm, 25, 0, 0, 0, 0, 24, strat|VM_SLEEP, &p); 1658 assert(rc == 0 && p == 0); 1659 rc = vmem_xalloc(vm, 0x100, 0, 0, 0, 1660 0xffffff01, 0xffffffff, strat|VM_SLEEP, &p); 1661 assert(rc != 0); 1662 rc = vmem_xalloc(vm, 0x100, 0, 0, 0, 1663 0xffffff00, 0xfffffffe, strat|VM_SLEEP, &p); 1664 assert(rc != 0); 1665 rc = vmem_xalloc(vm, 0x100, 0, 0, 0, 1666 0xffffff00, 0xffffffff, strat|VM_SLEEP, &p); 1667 assert(rc == 0); 1668 vmem_dump(vm, vmem_printf); 1669 for (;;) { 1670 struct reg *r; 1671 int t = rand() % 100; 1672 1673 if (t > 45) { 1674 /* alloc */ 1675 vmem_size_t sz = rand() % 500 + 1; 1676 bool x; 1677 vmem_size_t align, phase, nocross; 1678 vmem_addr_t minaddr, maxaddr; 1679 1680 if (t > 70) { 1681 x = true; 1682 /* XXX */ 1683 align = 1 << (rand() % 15); 1684 phase = rand() % 65536; 1685 nocross = 1 << (rand() % 15); 1686 if (align <= phase) { 1687 phase = 0; 1688 } 1689 if (VMEM_CROSS_P(phase, phase + sz - 1, 1690 nocross)) { 1691 nocross = 0; 1692 } 1693 do { 1694 minaddr = rand() % 50000; 1695 maxaddr = rand() % 70000; 1696 } while (minaddr > maxaddr); 1697 printf("=== xalloc %" PRIu64 1698 " align=%" PRIu64 ", phase=%" PRIu64 1699 ", nocross=%" PRIu64 ", min=%" PRIu64 1700 ", max=%" PRIu64 "\n", 1701 (uint64_t)sz, 1702 (uint64_t)align, 1703 (uint64_t)phase, 1704 (uint64_t)nocross, 1705 (uint64_t)minaddr, 1706 (uint64_t)maxaddr); 1707 rc = vmem_xalloc(vm, sz, align, phase, nocross, 1708 minaddr, maxaddr, strat|VM_SLEEP, &p); 1709 } else { 1710 x = false; 1711 printf("=== alloc %" PRIu64 "\n", (uint64_t)sz); 1712 rc = vmem_alloc(vm, sz, strat|VM_SLEEP, &p); 1713 } 1714 printf("-> %" PRIu64 "\n", (uint64_t)p); 1715 vmem_dump(vm, vmem_printf); 1716 if (rc != 0) { 1717 if (x) { 1718 continue; 1719 } 1720 break; 1721 } 1722 nreg++; 1723 reg = realloc(reg, sizeof(*reg) * nreg); 1724 r = ®[nreg - 1]; 1725 r->p = p; 1726 r->sz = sz; 1727 r->x = x; 1728 total += sz; 1729 nalloc++; 1730 } else if (nreg != 0) { 1731 /* free */ 1732 r = ®[rand() % nreg]; 1733 printf("=== free %" PRIu64 ", %" PRIu64 "\n", 1734 (uint64_t)r->p, (uint64_t)r->sz); 1735 if (r->x) { 1736 vmem_xfree(vm, r->p, r->sz); 1737 } else { 1738 vmem_free(vm, r->p, r->sz); 1739 } 1740 total -= r->sz; 1741 vmem_dump(vm, vmem_printf); 1742 *r = reg[nreg - 1]; 1743 nreg--; 1744 nfree++; 1745 } 1746 printf("total=%" PRIu64 "\n", (uint64_t)total); 1747 } 1748 fprintf(stderr, "total=%" PRIu64 ", nalloc=%d, nfree=%d\n", 1749 (uint64_t)total, nalloc, nfree); 1750 exit(EXIT_SUCCESS); 1751 } 1752 #endif /* defined(UNITTEST) */ 1753