1 /* $NetBSD: subr_vmem.c,v 1.73 2012/03/04 14:28:49 para Exp $ */ 2 3 /*- 4 * Copyright (c)2006,2007,2008,2009 YAMAMOTO Takashi, 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 */ 28 29 /* 30 * reference: 31 * - Magazines and Vmem: Extending the Slab Allocator 32 * to Many CPUs and Arbitrary Resources 33 * http://www.usenix.org/event/usenix01/bonwick.html 34 */ 35 36 #include <sys/cdefs.h> 37 __KERNEL_RCSID(0, "$NetBSD: subr_vmem.c,v 1.73 2012/03/04 14:28:49 para Exp $"); 38 39 #if defined(_KERNEL) 40 #include "opt_ddb.h" 41 #define QCACHE 42 #endif /* defined(_KERNEL) */ 43 44 #include <sys/param.h> 45 #include <sys/hash.h> 46 #include <sys/queue.h> 47 #include <sys/bitops.h> 48 49 #if defined(_KERNEL) 50 #include <sys/systm.h> 51 #include <sys/kernel.h> /* hz */ 52 #include <sys/callout.h> 53 #include <sys/kmem.h> 54 #include <sys/pool.h> 55 #include <sys/vmem.h> 56 #include <sys/workqueue.h> 57 #include <sys/atomic.h> 58 #include <uvm/uvm.h> 59 #include <uvm/uvm_extern.h> 60 #include <uvm/uvm_km.h> 61 #include <uvm/uvm_page.h> 62 #include <uvm/uvm_pdaemon.h> 63 #else /* defined(_KERNEL) */ 64 #include "../sys/vmem.h" 65 #endif /* defined(_KERNEL) */ 66 67 68 #if defined(_KERNEL) 69 #include <sys/evcnt.h> 70 #define VMEM_EVCNT_DEFINE(name) \ 71 struct evcnt vmem_evcnt_##name = EVCNT_INITIALIZER(EVCNT_TYPE_MISC, NULL, \ 72 "vmemev", #name); \ 73 EVCNT_ATTACH_STATIC(vmem_evcnt_##name); 74 #define VMEM_EVCNT_INCR(ev) vmem_evcnt_##ev.ev_count++ 75 #define VMEM_EVCNT_DECR(ev) vmem_evcnt_##ev.ev_count-- 76 77 VMEM_EVCNT_DEFINE(bt_pages) 78 VMEM_EVCNT_DEFINE(bt_count) 79 VMEM_EVCNT_DEFINE(bt_inuse) 80 81 #define LOCK_DECL(name) \ 82 kmutex_t name; char lockpad[COHERENCY_UNIT - sizeof(kmutex_t)] 83 84 #define CONDVAR_DECL(name) \ 85 kcondvar_t name; 86 87 #else /* defined(_KERNEL) */ 88 #include <stdio.h> 89 #include <errno.h> 90 #include <assert.h> 91 #include <stdlib.h> 92 #include <string.h> 93 94 #define VMEM_EVCNT_INCR(ev) /* nothing */ 95 #define VMEM_EVCNT_DECR(ev) /* nothing */ 96 97 #define UNITTEST 98 #define KASSERT(a) assert(a) 99 #define LOCK_DECL(name) /* nothing */ 100 #define CONDVAR_DECL(name) /* nothing */ 101 #define VMEM_CONDVAR_INIT(vm, wchan) /* nothing */ 102 #define VMEM_CONDVAR_BROADCAST(vm) /* nothing */ 103 #define mutex_init(a, b, c) /* nothing */ 104 #define mutex_destroy(a) /* nothing */ 105 #define mutex_enter(a) /* nothing */ 106 #define mutex_tryenter(a) true 107 #define mutex_exit(a) /* nothing */ 108 #define mutex_owned(a) /* nothing */ 109 #define ASSERT_SLEEPABLE() /* nothing */ 110 #define panic(...) printf(__VA_ARGS__); abort() 111 #endif /* defined(_KERNEL) */ 112 113 struct vmem; 114 struct vmem_btag; 115 116 #if defined(VMEM_SANITY) 117 static void vmem_check(vmem_t *); 118 #else /* defined(VMEM_SANITY) */ 119 #define vmem_check(vm) /* nothing */ 120 #endif /* defined(VMEM_SANITY) */ 121 122 #define VMEM_MAXORDER (sizeof(vmem_size_t) * CHAR_BIT) 123 124 #define VMEM_HASHSIZE_MIN 1 /* XXX */ 125 #define VMEM_HASHSIZE_MAX 65536 /* XXX */ 126 #define VMEM_HASHSIZE_INIT 1 127 128 #define VM_FITMASK (VM_BESTFIT | VM_INSTANTFIT) 129 130 CIRCLEQ_HEAD(vmem_seglist, vmem_btag); 131 LIST_HEAD(vmem_freelist, vmem_btag); 132 LIST_HEAD(vmem_hashlist, vmem_btag); 133 134 #if defined(QCACHE) 135 #define VMEM_QCACHE_IDX_MAX 32 136 137 #define QC_NAME_MAX 16 138 139 struct qcache { 140 pool_cache_t qc_cache; 141 vmem_t *qc_vmem; 142 char qc_name[QC_NAME_MAX]; 143 }; 144 typedef struct qcache qcache_t; 145 #define QC_POOL_TO_QCACHE(pool) ((qcache_t *)(pool->pr_qcache)) 146 #endif /* defined(QCACHE) */ 147 148 #define VMEM_NAME_MAX 16 149 150 /* vmem arena */ 151 struct vmem { 152 CONDVAR_DECL(vm_cv); 153 LOCK_DECL(vm_lock); 154 vm_flag_t vm_flags; 155 vmem_import_t *vm_importfn; 156 vmem_release_t *vm_releasefn; 157 size_t vm_nfreetags; 158 LIST_HEAD(, vmem_btag) vm_freetags; 159 void *vm_arg; 160 struct vmem_seglist vm_seglist; 161 struct vmem_freelist vm_freelist[VMEM_MAXORDER]; 162 size_t vm_hashsize; 163 size_t vm_nbusytag; 164 struct vmem_hashlist *vm_hashlist; 165 struct vmem_hashlist vm_hash0; 166 size_t vm_quantum_mask; 167 int vm_quantum_shift; 168 size_t vm_size; 169 size_t vm_inuse; 170 char vm_name[VMEM_NAME_MAX+1]; 171 LIST_ENTRY(vmem) vm_alllist; 172 173 #if defined(QCACHE) 174 /* quantum cache */ 175 size_t vm_qcache_max; 176 struct pool_allocator vm_qcache_allocator; 177 qcache_t vm_qcache_store[VMEM_QCACHE_IDX_MAX]; 178 qcache_t *vm_qcache[VMEM_QCACHE_IDX_MAX]; 179 #endif /* defined(QCACHE) */ 180 }; 181 182 #define VMEM_LOCK(vm) mutex_enter(&vm->vm_lock) 183 #define VMEM_TRYLOCK(vm) mutex_tryenter(&vm->vm_lock) 184 #define VMEM_UNLOCK(vm) mutex_exit(&vm->vm_lock) 185 #define VMEM_LOCK_INIT(vm, ipl) mutex_init(&vm->vm_lock, MUTEX_DEFAULT, ipl) 186 #define VMEM_LOCK_DESTROY(vm) mutex_destroy(&vm->vm_lock) 187 #define VMEM_ASSERT_LOCKED(vm) KASSERT(mutex_owned(&vm->vm_lock)) 188 189 #if defined(_KERNEL) 190 #define VMEM_CONDVAR_INIT(vm, wchan) cv_init(&vm->vm_cv, wchan) 191 #define VMEM_CONDVAR_DESTROY(vm) cv_destroy(&vm->vm_cv) 192 #define VMEM_CONDVAR_WAIT(vm) cv_wait(&vm->vm_cv, &vm->vm_lock) 193 #define VMEM_CONDVAR_BROADCAST(vm) cv_broadcast(&vm->vm_cv) 194 #endif /* defined(_KERNEL) */ 195 196 /* boundary tag */ 197 struct vmem_btag { 198 CIRCLEQ_ENTRY(vmem_btag) bt_seglist; 199 union { 200 LIST_ENTRY(vmem_btag) u_freelist; /* BT_TYPE_FREE */ 201 LIST_ENTRY(vmem_btag) u_hashlist; /* BT_TYPE_BUSY */ 202 } bt_u; 203 #define bt_hashlist bt_u.u_hashlist 204 #define bt_freelist bt_u.u_freelist 205 vmem_addr_t bt_start; 206 vmem_size_t bt_size; 207 int bt_type; 208 }; 209 210 #define BT_TYPE_SPAN 1 211 #define BT_TYPE_SPAN_STATIC 2 212 #define BT_TYPE_FREE 3 213 #define BT_TYPE_BUSY 4 214 #define BT_ISSPAN_P(bt) ((bt)->bt_type <= BT_TYPE_SPAN_STATIC) 215 216 #define BT_END(bt) ((bt)->bt_start + (bt)->bt_size - 1) 217 218 typedef struct vmem_btag bt_t; 219 220 #if defined(_KERNEL) 221 static kmutex_t vmem_list_lock; 222 static LIST_HEAD(, vmem) vmem_list = LIST_HEAD_INITIALIZER(vmem_list); 223 #endif /* defined(_KERNEL) */ 224 225 /* ---- misc */ 226 227 #define VMEM_ALIGNUP(addr, align) \ 228 (-(-(addr) & -(align))) 229 230 #define VMEM_CROSS_P(addr1, addr2, boundary) \ 231 ((((addr1) ^ (addr2)) & -(boundary)) != 0) 232 233 #define ORDER2SIZE(order) ((vmem_size_t)1 << (order)) 234 #define SIZE2ORDER(size) ((int)ilog2(size)) 235 236 #if !defined(_KERNEL) 237 #define xmalloc(sz, flags) malloc(sz) 238 #define xfree(p, sz) free(p) 239 #define bt_alloc(vm, flags) malloc(sizeof(bt_t)) 240 #define bt_free(vm, bt) free(bt) 241 #else /* defined(_KERNEL) */ 242 243 #define xmalloc(sz, flags) \ 244 kmem_alloc(sz, ((flags) & VM_SLEEP) ? KM_SLEEP : KM_NOSLEEP); 245 #define xfree(p, sz) kmem_free(p, sz); 246 247 #define BT_MINRESERVE 6 248 #define BT_MAXFREE 64 249 #define STATIC_VMEM_COUNT 5 250 #define STATIC_BT_COUNT 200 251 /* must be equal or greater then qcache multiplier for kmem_va_arena */ 252 #define STATIC_QC_POOL_COUNT 8 253 254 static struct vmem static_vmems[STATIC_VMEM_COUNT]; 255 static int static_vmem_count = STATIC_VMEM_COUNT; 256 257 static struct vmem_btag static_bts[STATIC_BT_COUNT]; 258 static int static_bt_count = STATIC_BT_COUNT; 259 260 static struct pool_cache static_qc_pools[STATIC_QC_POOL_COUNT]; 261 static int static_qc_pool_count = STATIC_QC_POOL_COUNT; 262 263 vmem_t *kmem_va_meta_arena; 264 vmem_t *kmem_meta_arena; 265 266 static kmutex_t vmem_btag_lock; 267 static LIST_HEAD(, vmem_btag) vmem_btag_freelist; 268 static size_t vmem_btag_freelist_count = 0; 269 static size_t vmem_btag_count = STATIC_BT_COUNT; 270 271 /* ---- boundary tag */ 272 273 #define BT_PER_PAGE (PAGE_SIZE / sizeof(bt_t)) 274 275 static int bt_refill(vmem_t *vm, vm_flag_t flags); 276 277 static int 278 bt_refillglobal(vm_flag_t flags) 279 { 280 vmem_addr_t va; 281 bt_t *btp; 282 bt_t *bt; 283 int i; 284 285 mutex_enter(&vmem_btag_lock); 286 if (vmem_btag_freelist_count > (BT_MINRESERVE * 16)) { 287 mutex_exit(&vmem_btag_lock); 288 return 0; 289 } 290 291 if (vmem_alloc(kmem_meta_arena, PAGE_SIZE, 292 (flags & ~VM_FITMASK) | VM_INSTANTFIT | VM_POPULATING, &va) != 0) { 293 mutex_exit(&vmem_btag_lock); 294 return ENOMEM; 295 } 296 VMEM_EVCNT_INCR(bt_pages); 297 298 btp = (void *) va; 299 for (i = 0; i < (BT_PER_PAGE); i++) { 300 bt = btp; 301 memset(bt, 0, sizeof(*bt)); 302 LIST_INSERT_HEAD(&vmem_btag_freelist, bt, 303 bt_freelist); 304 vmem_btag_freelist_count++; 305 vmem_btag_count++; 306 VMEM_EVCNT_INCR(bt_count); 307 btp++; 308 } 309 mutex_exit(&vmem_btag_lock); 310 311 bt_refill(kmem_arena, (flags & ~VM_FITMASK) | VM_INSTANTFIT); 312 bt_refill(kmem_va_meta_arena, (flags & ~VM_FITMASK) | VM_INSTANTFIT); 313 bt_refill(kmem_meta_arena, (flags & ~VM_FITMASK) | VM_INSTANTFIT); 314 315 return 0; 316 } 317 318 static int 319 bt_refill(vmem_t *vm, vm_flag_t flags) 320 { 321 bt_t *bt; 322 323 bt_refillglobal(flags); 324 325 VMEM_LOCK(vm); 326 mutex_enter(&vmem_btag_lock); 327 while (!LIST_EMPTY(&vmem_btag_freelist) && 328 vm->vm_nfreetags < (BT_MINRESERVE * 2)) { 329 bt = LIST_FIRST(&vmem_btag_freelist); 330 LIST_REMOVE(bt, bt_freelist); 331 LIST_INSERT_HEAD(&vm->vm_freetags, bt, bt_freelist); 332 vm->vm_nfreetags++; 333 vmem_btag_freelist_count--; 334 } 335 mutex_exit(&vmem_btag_lock); 336 337 if (vm->vm_nfreetags == 0) { 338 VMEM_UNLOCK(vm); 339 return ENOMEM; 340 } 341 VMEM_UNLOCK(vm); 342 343 return 0; 344 } 345 346 static inline bt_t * 347 bt_alloc(vmem_t *vm, vm_flag_t flags) 348 { 349 bt_t *bt; 350 again: 351 VMEM_LOCK(vm); 352 if (vm->vm_nfreetags < BT_MINRESERVE && 353 (flags & VM_POPULATING) == 0) { 354 VMEM_UNLOCK(vm); 355 if (bt_refill(vm, VM_NOSLEEP | VM_INSTANTFIT)) { 356 return NULL; 357 } 358 goto again; 359 } 360 bt = LIST_FIRST(&vm->vm_freetags); 361 LIST_REMOVE(bt, bt_freelist); 362 vm->vm_nfreetags--; 363 VMEM_UNLOCK(vm); 364 VMEM_EVCNT_INCR(bt_inuse); 365 366 return bt; 367 } 368 369 static inline void 370 bt_free(vmem_t *vm, bt_t *bt) 371 { 372 373 VMEM_LOCK(vm); 374 LIST_INSERT_HEAD(&vm->vm_freetags, bt, bt_freelist); 375 vm->vm_nfreetags++; 376 while (vm->vm_nfreetags > BT_MAXFREE) { 377 bt = LIST_FIRST(&vm->vm_freetags); 378 LIST_REMOVE(bt, bt_freelist); 379 vm->vm_nfreetags--; 380 mutex_enter(&vmem_btag_lock); 381 LIST_INSERT_HEAD(&vmem_btag_freelist, bt, bt_freelist); 382 vmem_btag_freelist_count++; 383 mutex_exit(&vmem_btag_lock); 384 } 385 VMEM_UNLOCK(vm); 386 VMEM_EVCNT_DECR(bt_inuse); 387 } 388 389 #endif /* defined(_KERNEL) */ 390 391 /* 392 * freelist[0] ... [1, 1] 393 * freelist[1] ... [2, 3] 394 * freelist[2] ... [4, 7] 395 * freelist[3] ... [8, 15] 396 * : 397 * freelist[n] ... [(1 << n), (1 << (n + 1)) - 1] 398 * : 399 */ 400 401 static struct vmem_freelist * 402 bt_freehead_tofree(vmem_t *vm, vmem_size_t size) 403 { 404 const vmem_size_t qsize = size >> vm->vm_quantum_shift; 405 const int idx = SIZE2ORDER(qsize); 406 407 KASSERT(size != 0 && qsize != 0); 408 KASSERT((size & vm->vm_quantum_mask) == 0); 409 KASSERT(idx >= 0); 410 KASSERT(idx < VMEM_MAXORDER); 411 412 return &vm->vm_freelist[idx]; 413 } 414 415 /* 416 * bt_freehead_toalloc: return the freelist for the given size and allocation 417 * strategy. 418 * 419 * for VM_INSTANTFIT, return the list in which any blocks are large enough 420 * for the requested size. otherwise, return the list which can have blocks 421 * large enough for the requested size. 422 */ 423 424 static struct vmem_freelist * 425 bt_freehead_toalloc(vmem_t *vm, vmem_size_t size, vm_flag_t strat) 426 { 427 const vmem_size_t qsize = size >> vm->vm_quantum_shift; 428 int idx = SIZE2ORDER(qsize); 429 430 KASSERT(size != 0 && qsize != 0); 431 KASSERT((size & vm->vm_quantum_mask) == 0); 432 433 if (strat == VM_INSTANTFIT && ORDER2SIZE(idx) != qsize) { 434 idx++; 435 /* check too large request? */ 436 } 437 KASSERT(idx >= 0); 438 KASSERT(idx < VMEM_MAXORDER); 439 440 return &vm->vm_freelist[idx]; 441 } 442 443 /* ---- boundary tag hash */ 444 445 static struct vmem_hashlist * 446 bt_hashhead(vmem_t *vm, vmem_addr_t addr) 447 { 448 struct vmem_hashlist *list; 449 unsigned int hash; 450 451 hash = hash32_buf(&addr, sizeof(addr), HASH32_BUF_INIT); 452 list = &vm->vm_hashlist[hash % vm->vm_hashsize]; 453 454 return list; 455 } 456 457 static bt_t * 458 bt_lookupbusy(vmem_t *vm, vmem_addr_t addr) 459 { 460 struct vmem_hashlist *list; 461 bt_t *bt; 462 463 list = bt_hashhead(vm, addr); 464 LIST_FOREACH(bt, list, bt_hashlist) { 465 if (bt->bt_start == addr) { 466 break; 467 } 468 } 469 470 return bt; 471 } 472 473 static void 474 bt_rembusy(vmem_t *vm, bt_t *bt) 475 { 476 477 KASSERT(vm->vm_nbusytag > 0); 478 vm->vm_inuse -= bt->bt_size; 479 vm->vm_nbusytag--; 480 LIST_REMOVE(bt, bt_hashlist); 481 } 482 483 static void 484 bt_insbusy(vmem_t *vm, bt_t *bt) 485 { 486 struct vmem_hashlist *list; 487 488 KASSERT(bt->bt_type == BT_TYPE_BUSY); 489 490 list = bt_hashhead(vm, bt->bt_start); 491 LIST_INSERT_HEAD(list, bt, bt_hashlist); 492 vm->vm_nbusytag++; 493 vm->vm_inuse += bt->bt_size; 494 } 495 496 /* ---- boundary tag list */ 497 498 static void 499 bt_remseg(vmem_t *vm, bt_t *bt) 500 { 501 502 CIRCLEQ_REMOVE(&vm->vm_seglist, bt, bt_seglist); 503 } 504 505 static void 506 bt_insseg(vmem_t *vm, bt_t *bt, bt_t *prev) 507 { 508 509 CIRCLEQ_INSERT_AFTER(&vm->vm_seglist, prev, bt, bt_seglist); 510 } 511 512 static void 513 bt_insseg_tail(vmem_t *vm, bt_t *bt) 514 { 515 516 CIRCLEQ_INSERT_TAIL(&vm->vm_seglist, bt, bt_seglist); 517 } 518 519 static void 520 bt_remfree(vmem_t *vm, bt_t *bt) 521 { 522 523 KASSERT(bt->bt_type == BT_TYPE_FREE); 524 525 LIST_REMOVE(bt, bt_freelist); 526 } 527 528 static void 529 bt_insfree(vmem_t *vm, bt_t *bt) 530 { 531 struct vmem_freelist *list; 532 533 list = bt_freehead_tofree(vm, bt->bt_size); 534 LIST_INSERT_HEAD(list, bt, bt_freelist); 535 } 536 537 /* ---- vmem internal functions */ 538 539 #if defined(QCACHE) 540 static inline vm_flag_t 541 prf_to_vmf(int prflags) 542 { 543 vm_flag_t vmflags; 544 545 KASSERT((prflags & ~(PR_LIMITFAIL | PR_WAITOK | PR_NOWAIT)) == 0); 546 if ((prflags & PR_WAITOK) != 0) { 547 vmflags = VM_SLEEP; 548 } else { 549 vmflags = VM_NOSLEEP; 550 } 551 return vmflags; 552 } 553 554 static inline int 555 vmf_to_prf(vm_flag_t vmflags) 556 { 557 int prflags; 558 559 if ((vmflags & VM_SLEEP) != 0) { 560 prflags = PR_WAITOK; 561 } else { 562 prflags = PR_NOWAIT; 563 } 564 return prflags; 565 } 566 567 static size_t 568 qc_poolpage_size(size_t qcache_max) 569 { 570 int i; 571 572 for (i = 0; ORDER2SIZE(i) <= qcache_max * 3; i++) { 573 /* nothing */ 574 } 575 return ORDER2SIZE(i); 576 } 577 578 static void * 579 qc_poolpage_alloc(struct pool *pool, int prflags) 580 { 581 qcache_t *qc = QC_POOL_TO_QCACHE(pool); 582 vmem_t *vm = qc->qc_vmem; 583 vmem_addr_t addr; 584 585 if (vmem_alloc(vm, pool->pr_alloc->pa_pagesz, 586 prf_to_vmf(prflags) | VM_INSTANTFIT, &addr) != 0) 587 return NULL; 588 return (void *)addr; 589 } 590 591 static void 592 qc_poolpage_free(struct pool *pool, void *addr) 593 { 594 qcache_t *qc = QC_POOL_TO_QCACHE(pool); 595 vmem_t *vm = qc->qc_vmem; 596 597 vmem_free(vm, (vmem_addr_t)addr, pool->pr_alloc->pa_pagesz); 598 } 599 600 static void 601 qc_init(vmem_t *vm, size_t qcache_max, int ipl) 602 { 603 qcache_t *prevqc; 604 struct pool_allocator *pa; 605 int qcache_idx_max; 606 int i; 607 608 KASSERT((qcache_max & vm->vm_quantum_mask) == 0); 609 if (qcache_max > (VMEM_QCACHE_IDX_MAX << vm->vm_quantum_shift)) { 610 qcache_max = VMEM_QCACHE_IDX_MAX << vm->vm_quantum_shift; 611 } 612 vm->vm_qcache_max = qcache_max; 613 pa = &vm->vm_qcache_allocator; 614 memset(pa, 0, sizeof(*pa)); 615 pa->pa_alloc = qc_poolpage_alloc; 616 pa->pa_free = qc_poolpage_free; 617 pa->pa_pagesz = qc_poolpage_size(qcache_max); 618 619 qcache_idx_max = qcache_max >> vm->vm_quantum_shift; 620 prevqc = NULL; 621 for (i = qcache_idx_max; i > 0; i--) { 622 qcache_t *qc = &vm->vm_qcache_store[i - 1]; 623 size_t size = i << vm->vm_quantum_shift; 624 pool_cache_t pc; 625 626 qc->qc_vmem = vm; 627 snprintf(qc->qc_name, sizeof(qc->qc_name), "%s-%zu", 628 vm->vm_name, size); 629 630 if (vm->vm_flags & VM_BOOTSTRAP) { 631 KASSERT(static_qc_pool_count > 0); 632 pc = &static_qc_pools[--static_qc_pool_count]; 633 pool_cache_bootstrap(pc, size, 634 ORDER2SIZE(vm->vm_quantum_shift), 0, 635 PR_NOALIGN | PR_NOTOUCH | PR_RECURSIVE /* XXX */, 636 qc->qc_name, pa, ipl, NULL, NULL, NULL); 637 } else { 638 pc = pool_cache_init(size, 639 ORDER2SIZE(vm->vm_quantum_shift), 0, 640 PR_NOALIGN | PR_NOTOUCH /* XXX */, 641 qc->qc_name, pa, ipl, NULL, NULL, NULL); 642 } 643 qc->qc_cache = pc; 644 KASSERT(qc->qc_cache != NULL); /* XXX */ 645 if (prevqc != NULL && 646 qc->qc_cache->pc_pool.pr_itemsperpage == 647 prevqc->qc_cache->pc_pool.pr_itemsperpage) { 648 if (vm->vm_flags & VM_BOOTSTRAP) { 649 pool_cache_bootstrap_destroy(pc); 650 //static_qc_pool_count++; 651 } else { 652 pool_cache_destroy(qc->qc_cache); 653 } 654 vm->vm_qcache[i - 1] = prevqc; 655 continue; 656 } 657 qc->qc_cache->pc_pool.pr_qcache = qc; 658 vm->vm_qcache[i - 1] = qc; 659 prevqc = qc; 660 } 661 } 662 663 static void 664 qc_destroy(vmem_t *vm) 665 { 666 const qcache_t *prevqc; 667 int i; 668 int qcache_idx_max; 669 670 qcache_idx_max = vm->vm_qcache_max >> vm->vm_quantum_shift; 671 prevqc = NULL; 672 for (i = 0; i < qcache_idx_max; i++) { 673 qcache_t *qc = vm->vm_qcache[i]; 674 675 if (prevqc == qc) { 676 continue; 677 } 678 if (vm->vm_flags & VM_BOOTSTRAP) { 679 pool_cache_bootstrap_destroy(qc->qc_cache); 680 } else { 681 pool_cache_destroy(qc->qc_cache); 682 } 683 prevqc = qc; 684 } 685 } 686 #endif 687 688 #if defined(_KERNEL) 689 void 690 vmem_bootstrap(void) 691 { 692 693 mutex_init(&vmem_list_lock, MUTEX_DEFAULT, IPL_VM); 694 mutex_init(&vmem_btag_lock, MUTEX_DEFAULT, IPL_VM); 695 696 while (static_bt_count-- > 0) { 697 bt_t *bt = &static_bts[static_bt_count]; 698 LIST_INSERT_HEAD(&vmem_btag_freelist, bt, bt_freelist); 699 VMEM_EVCNT_INCR(bt_count); 700 vmem_btag_freelist_count++; 701 } 702 } 703 704 void 705 vmem_init(vmem_t *vm) 706 { 707 708 kmem_va_meta_arena = vmem_create("vmem-va", 0, 0, PAGE_SIZE, 709 vmem_alloc, vmem_free, vm, 710 0, VM_NOSLEEP | VM_BOOTSTRAP | VM_LARGEIMPORT, 711 IPL_VM); 712 713 kmem_meta_arena = vmem_create("vmem-meta", 0, 0, PAGE_SIZE, 714 uvm_km_kmem_alloc, uvm_km_kmem_free, kmem_va_meta_arena, 715 0, VM_NOSLEEP | VM_BOOTSTRAP, IPL_VM); 716 } 717 #endif /* defined(_KERNEL) */ 718 719 static int 720 vmem_add1(vmem_t *vm, vmem_addr_t addr, vmem_size_t size, vm_flag_t flags, 721 int spanbttype) 722 { 723 bt_t *btspan; 724 bt_t *btfree; 725 726 KASSERT((flags & (VM_SLEEP|VM_NOSLEEP)) != 0); 727 KASSERT((~flags & (VM_SLEEP|VM_NOSLEEP)) != 0); 728 KASSERT(spanbttype == BT_TYPE_SPAN || 729 spanbttype == BT_TYPE_SPAN_STATIC); 730 731 btspan = bt_alloc(vm, flags); 732 if (btspan == NULL) { 733 return ENOMEM; 734 } 735 btfree = bt_alloc(vm, flags); 736 if (btfree == NULL) { 737 bt_free(vm, btspan); 738 return ENOMEM; 739 } 740 741 btspan->bt_type = spanbttype; 742 btspan->bt_start = addr; 743 btspan->bt_size = size; 744 745 btfree->bt_type = BT_TYPE_FREE; 746 btfree->bt_start = addr; 747 btfree->bt_size = size; 748 749 VMEM_LOCK(vm); 750 bt_insseg_tail(vm, btspan); 751 bt_insseg(vm, btfree, btspan); 752 bt_insfree(vm, btfree); 753 vm->vm_size += size; 754 VMEM_UNLOCK(vm); 755 756 return 0; 757 } 758 759 static void 760 vmem_destroy1(vmem_t *vm) 761 { 762 763 #if defined(QCACHE) 764 qc_destroy(vm); 765 #endif /* defined(QCACHE) */ 766 if (vm->vm_hashlist != NULL) { 767 int i; 768 769 for (i = 0; i < vm->vm_hashsize; i++) { 770 bt_t *bt; 771 772 while ((bt = LIST_FIRST(&vm->vm_hashlist[i])) != NULL) { 773 KASSERT(bt->bt_type == BT_TYPE_SPAN_STATIC); 774 bt_free(vm, bt); 775 } 776 } 777 if (vm->vm_hashlist != &vm->vm_hash0) { 778 xfree(vm->vm_hashlist, 779 sizeof(struct vmem_hashlist *) * vm->vm_hashsize); 780 } 781 } 782 783 while (vm->vm_nfreetags > 0) { 784 bt_t *bt = LIST_FIRST(&vm->vm_freetags); 785 LIST_REMOVE(bt, bt_freelist); 786 vm->vm_nfreetags--; 787 mutex_enter(&vmem_btag_lock); 788 #if defined (_KERNEL) 789 LIST_INSERT_HEAD(&vmem_btag_freelist, bt, bt_freelist); 790 vmem_btag_freelist_count++; 791 #endif /* defined(_KERNEL) */ 792 mutex_exit(&vmem_btag_lock); 793 } 794 795 VMEM_LOCK_DESTROY(vm); 796 xfree(vm, sizeof(*vm)); 797 } 798 799 static int 800 vmem_import(vmem_t *vm, vmem_size_t size, vm_flag_t flags) 801 { 802 vmem_addr_t addr; 803 int rc; 804 805 if (vm->vm_importfn == NULL) { 806 return EINVAL; 807 } 808 809 if (vm->vm_flags & VM_LARGEIMPORT) { 810 size *= 8; 811 } 812 813 if (vm->vm_flags & VM_XIMPORT) { 814 rc = ((vmem_ximport_t *)vm->vm_importfn)(vm->vm_arg, size, 815 &size, flags, &addr); 816 } else { 817 rc = (vm->vm_importfn)(vm->vm_arg, size, flags, &addr); 818 } 819 if (rc) { 820 return ENOMEM; 821 } 822 823 if (vmem_add1(vm, addr, size, flags, BT_TYPE_SPAN) != 0) { 824 (*vm->vm_releasefn)(vm->vm_arg, addr, size); 825 return ENOMEM; 826 } 827 828 return 0; 829 } 830 831 static int 832 vmem_rehash(vmem_t *vm, size_t newhashsize, vm_flag_t flags) 833 { 834 bt_t *bt; 835 int i; 836 struct vmem_hashlist *newhashlist; 837 struct vmem_hashlist *oldhashlist; 838 size_t oldhashsize; 839 840 KASSERT(newhashsize > 0); 841 842 newhashlist = 843 xmalloc(sizeof(struct vmem_hashlist *) * newhashsize, flags); 844 if (newhashlist == NULL) { 845 return ENOMEM; 846 } 847 for (i = 0; i < newhashsize; i++) { 848 LIST_INIT(&newhashlist[i]); 849 } 850 851 if (!VMEM_TRYLOCK(vm)) { 852 xfree(newhashlist, 853 sizeof(struct vmem_hashlist *) * newhashsize); 854 return EBUSY; 855 } 856 oldhashlist = vm->vm_hashlist; 857 oldhashsize = vm->vm_hashsize; 858 vm->vm_hashlist = newhashlist; 859 vm->vm_hashsize = newhashsize; 860 if (oldhashlist == NULL) { 861 VMEM_UNLOCK(vm); 862 return 0; 863 } 864 for (i = 0; i < oldhashsize; i++) { 865 while ((bt = LIST_FIRST(&oldhashlist[i])) != NULL) { 866 bt_rembusy(vm, bt); /* XXX */ 867 bt_insbusy(vm, bt); 868 } 869 } 870 VMEM_UNLOCK(vm); 871 872 if (oldhashlist != &vm->vm_hash0) { 873 xfree(oldhashlist, 874 sizeof(struct vmem_hashlist *) * oldhashsize); 875 } 876 877 return 0; 878 } 879 880 /* 881 * vmem_fit: check if a bt can satisfy the given restrictions. 882 * 883 * it's a caller's responsibility to ensure the region is big enough 884 * before calling us. 885 */ 886 887 static int 888 vmem_fit(const bt_t const *bt, vmem_size_t size, vmem_size_t align, 889 vmem_size_t phase, vmem_size_t nocross, 890 vmem_addr_t minaddr, vmem_addr_t maxaddr, vmem_addr_t *addrp) 891 { 892 vmem_addr_t start; 893 vmem_addr_t end; 894 895 KASSERT(size > 0); 896 KASSERT(bt->bt_size >= size); /* caller's responsibility */ 897 898 /* 899 * XXX assumption: vmem_addr_t and vmem_size_t are 900 * unsigned integer of the same size. 901 */ 902 903 start = bt->bt_start; 904 if (start < minaddr) { 905 start = minaddr; 906 } 907 end = BT_END(bt); 908 if (end > maxaddr) { 909 end = maxaddr; 910 } 911 if (start > end) { 912 return ENOMEM; 913 } 914 915 start = VMEM_ALIGNUP(start - phase, align) + phase; 916 if (start < bt->bt_start) { 917 start += align; 918 } 919 if (VMEM_CROSS_P(start, start + size - 1, nocross)) { 920 KASSERT(align < nocross); 921 start = VMEM_ALIGNUP(start - phase, nocross) + phase; 922 } 923 if (start <= end && end - start >= size - 1) { 924 KASSERT((start & (align - 1)) == phase); 925 KASSERT(!VMEM_CROSS_P(start, start + size - 1, nocross)); 926 KASSERT(minaddr <= start); 927 KASSERT(maxaddr == 0 || start + size - 1 <= maxaddr); 928 KASSERT(bt->bt_start <= start); 929 KASSERT(BT_END(bt) - start >= size - 1); 930 *addrp = start; 931 return 0; 932 } 933 return ENOMEM; 934 } 935 936 937 /* 938 * vmem_create_internal: creates a vmem arena. 939 */ 940 941 static vmem_t * 942 vmem_create_internal(const char *name, vmem_addr_t base, vmem_size_t size, 943 vmem_size_t quantum, vmem_import_t *importfn, vmem_release_t *releasefn, 944 void *arg, vmem_size_t qcache_max, vm_flag_t flags, int ipl) 945 { 946 vmem_t *vm = NULL; 947 int i; 948 949 KASSERT((flags & (VM_SLEEP|VM_NOSLEEP)) != 0); 950 KASSERT((~flags & (VM_SLEEP|VM_NOSLEEP)) != 0); 951 KASSERT(quantum > 0); 952 953 if (flags & VM_BOOTSTRAP) { 954 #if defined(_KERNEL) 955 KASSERT(static_vmem_count > 0); 956 vm = &static_vmems[--static_vmem_count]; 957 #endif /* defined(_KERNEL) */ 958 } else { 959 vm = xmalloc(sizeof(*vm), flags); 960 } 961 if (vm == NULL) { 962 return NULL; 963 } 964 965 VMEM_CONDVAR_INIT(vm, "vmem"); 966 VMEM_LOCK_INIT(vm, ipl); 967 vm->vm_flags = flags; 968 vm->vm_nfreetags = 0; 969 LIST_INIT(&vm->vm_freetags); 970 strlcpy(vm->vm_name, name, sizeof(vm->vm_name)); 971 vm->vm_quantum_mask = quantum - 1; 972 vm->vm_quantum_shift = SIZE2ORDER(quantum); 973 KASSERT(ORDER2SIZE(vm->vm_quantum_shift) == quantum); 974 vm->vm_importfn = importfn; 975 vm->vm_releasefn = releasefn; 976 vm->vm_arg = arg; 977 vm->vm_nbusytag = 0; 978 vm->vm_size = 0; 979 vm->vm_inuse = 0; 980 #if defined(QCACHE) 981 qc_init(vm, qcache_max, ipl); 982 #endif /* defined(QCACHE) */ 983 984 CIRCLEQ_INIT(&vm->vm_seglist); 985 for (i = 0; i < VMEM_MAXORDER; i++) { 986 LIST_INIT(&vm->vm_freelist[i]); 987 } 988 vm->vm_hashlist = NULL; 989 if (flags & VM_BOOTSTRAP) { 990 vm->vm_hashsize = 1; 991 vm->vm_hashlist = &vm->vm_hash0; 992 } else if (vmem_rehash(vm, VMEM_HASHSIZE_INIT, flags)) { 993 vmem_destroy1(vm); 994 return NULL; 995 } 996 997 if (size != 0) { 998 if (vmem_add(vm, base, size, flags) != 0) { 999 vmem_destroy1(vm); 1000 return NULL; 1001 } 1002 } 1003 1004 #if defined(_KERNEL) 1005 if (flags & VM_BOOTSTRAP) { 1006 bt_refill(vm, VM_NOSLEEP); 1007 } 1008 1009 mutex_enter(&vmem_list_lock); 1010 LIST_INSERT_HEAD(&vmem_list, vm, vm_alllist); 1011 mutex_exit(&vmem_list_lock); 1012 #endif /* defined(_KERNEL) */ 1013 1014 return vm; 1015 } 1016 1017 1018 /* ---- vmem API */ 1019 1020 /* 1021 * vmem_create: create an arena. 1022 * 1023 * => must not be called from interrupt context. 1024 */ 1025 1026 vmem_t * 1027 vmem_create(const char *name, vmem_addr_t base, vmem_size_t size, 1028 vmem_size_t quantum, vmem_import_t *importfn, vmem_release_t *releasefn, 1029 vmem_t *source, vmem_size_t qcache_max, vm_flag_t flags, int ipl) 1030 { 1031 1032 KASSERT((flags & (VM_SLEEP|VM_NOSLEEP)) != 0); 1033 KASSERT((~flags & (VM_SLEEP|VM_NOSLEEP)) != 0); 1034 KASSERT((flags & (VM_XIMPORT)) == 0); 1035 1036 return vmem_create_internal(name, base, size, quantum, 1037 importfn, releasefn, source, qcache_max, flags, ipl); 1038 } 1039 1040 /* 1041 * vmem_xcreate: create an arena takes alternative import func. 1042 * 1043 * => must not be called from interrupt context. 1044 */ 1045 1046 vmem_t * 1047 vmem_xcreate(const char *name, vmem_addr_t base, vmem_size_t size, 1048 vmem_size_t quantum, vmem_ximport_t *importfn, vmem_release_t *releasefn, 1049 vmem_t *source, vmem_size_t qcache_max, vm_flag_t flags, int ipl) 1050 { 1051 1052 KASSERT((flags & (VM_SLEEP|VM_NOSLEEP)) != 0); 1053 KASSERT((~flags & (VM_SLEEP|VM_NOSLEEP)) != 0); 1054 KASSERT((flags & (VM_XIMPORT)) == 0); 1055 1056 return vmem_create_internal(name, base, size, quantum, 1057 (vmem_import_t *)importfn, releasefn, source, 1058 qcache_max, flags | VM_XIMPORT, ipl); 1059 } 1060 1061 void 1062 vmem_destroy(vmem_t *vm) 1063 { 1064 1065 #if defined(_KERNEL) 1066 mutex_enter(&vmem_list_lock); 1067 LIST_REMOVE(vm, vm_alllist); 1068 mutex_exit(&vmem_list_lock); 1069 #endif /* defined(_KERNEL) */ 1070 1071 vmem_destroy1(vm); 1072 } 1073 1074 vmem_size_t 1075 vmem_roundup_size(vmem_t *vm, vmem_size_t size) 1076 { 1077 1078 return (size + vm->vm_quantum_mask) & ~vm->vm_quantum_mask; 1079 } 1080 1081 /* 1082 * vmem_alloc: 1083 * 1084 * => caller must ensure appropriate spl, 1085 * if the arena can be accessed from interrupt context. 1086 */ 1087 1088 int 1089 vmem_alloc(vmem_t *vm, vmem_size_t size, vm_flag_t flags, vmem_addr_t *addrp) 1090 { 1091 const vm_flag_t strat __unused = flags & VM_FITMASK; 1092 1093 KASSERT((flags & (VM_SLEEP|VM_NOSLEEP)) != 0); 1094 KASSERT((~flags & (VM_SLEEP|VM_NOSLEEP)) != 0); 1095 1096 KASSERT(size > 0); 1097 KASSERT(strat == VM_BESTFIT || strat == VM_INSTANTFIT); 1098 if ((flags & VM_SLEEP) != 0) { 1099 ASSERT_SLEEPABLE(); 1100 } 1101 1102 #if defined(QCACHE) 1103 if (size <= vm->vm_qcache_max) { 1104 void *p; 1105 int qidx = (size + vm->vm_quantum_mask) >> vm->vm_quantum_shift; 1106 qcache_t *qc = vm->vm_qcache[qidx - 1]; 1107 1108 p = pool_cache_get(qc->qc_cache, vmf_to_prf(flags)); 1109 if (addrp != NULL) 1110 *addrp = (vmem_addr_t)p; 1111 return (p == NULL) ? ENOMEM : 0; 1112 } 1113 #endif /* defined(QCACHE) */ 1114 1115 return vmem_xalloc(vm, size, 0, 0, 0, VMEM_ADDR_MIN, VMEM_ADDR_MAX, 1116 flags, addrp); 1117 } 1118 1119 int 1120 vmem_xalloc(vmem_t *vm, const vmem_size_t size0, vmem_size_t align, 1121 const vmem_size_t phase, const vmem_size_t nocross, 1122 const vmem_addr_t minaddr, const vmem_addr_t maxaddr, const vm_flag_t flags, 1123 vmem_addr_t *addrp) 1124 { 1125 struct vmem_freelist *list; 1126 struct vmem_freelist *first; 1127 struct vmem_freelist *end; 1128 bt_t *bt; 1129 bt_t *btnew; 1130 bt_t *btnew2; 1131 const vmem_size_t size = vmem_roundup_size(vm, size0); 1132 vm_flag_t strat = flags & VM_FITMASK; 1133 vmem_addr_t start; 1134 int rc; 1135 1136 KASSERT(size0 > 0); 1137 KASSERT(size > 0); 1138 KASSERT(strat == VM_BESTFIT || strat == VM_INSTANTFIT); 1139 if ((flags & VM_SLEEP) != 0) { 1140 ASSERT_SLEEPABLE(); 1141 } 1142 KASSERT((align & vm->vm_quantum_mask) == 0); 1143 KASSERT((align & (align - 1)) == 0); 1144 KASSERT((phase & vm->vm_quantum_mask) == 0); 1145 KASSERT((nocross & vm->vm_quantum_mask) == 0); 1146 KASSERT((nocross & (nocross - 1)) == 0); 1147 KASSERT((align == 0 && phase == 0) || phase < align); 1148 KASSERT(nocross == 0 || nocross >= size); 1149 KASSERT(minaddr <= maxaddr); 1150 KASSERT(!VMEM_CROSS_P(phase, phase + size - 1, nocross)); 1151 1152 if (align == 0) { 1153 align = vm->vm_quantum_mask + 1; 1154 } 1155 1156 /* 1157 * allocate boundary tags before acquiring the vmem lock. 1158 */ 1159 btnew = bt_alloc(vm, flags); 1160 if (btnew == NULL) { 1161 return ENOMEM; 1162 } 1163 btnew2 = bt_alloc(vm, flags); /* XXX not necessary if no restrictions */ 1164 if (btnew2 == NULL) { 1165 bt_free(vm, btnew); 1166 return ENOMEM; 1167 } 1168 1169 /* 1170 * choose a free block from which we allocate. 1171 */ 1172 retry_strat: 1173 first = bt_freehead_toalloc(vm, size, strat); 1174 end = &vm->vm_freelist[VMEM_MAXORDER]; 1175 retry: 1176 bt = NULL; 1177 VMEM_LOCK(vm); 1178 vmem_check(vm); 1179 if (strat == VM_INSTANTFIT) { 1180 /* 1181 * just choose the first block which satisfies our restrictions. 1182 * 1183 * note that we don't need to check the size of the blocks 1184 * because any blocks found on these list should be larger than 1185 * the given size. 1186 */ 1187 for (list = first; list < end; list++) { 1188 bt = LIST_FIRST(list); 1189 if (bt != NULL) { 1190 rc = vmem_fit(bt, size, align, phase, 1191 nocross, minaddr, maxaddr, &start); 1192 if (rc == 0) { 1193 goto gotit; 1194 } 1195 /* 1196 * don't bother to follow the bt_freelist link 1197 * here. the list can be very long and we are 1198 * told to run fast. blocks from the later free 1199 * lists are larger and have better chances to 1200 * satisfy our restrictions. 1201 */ 1202 } 1203 } 1204 } else { /* VM_BESTFIT */ 1205 /* 1206 * we assume that, for space efficiency, it's better to 1207 * allocate from a smaller block. thus we will start searching 1208 * from the lower-order list than VM_INSTANTFIT. 1209 * however, don't bother to find the smallest block in a free 1210 * list because the list can be very long. we can revisit it 1211 * if/when it turns out to be a problem. 1212 * 1213 * note that the 'first' list can contain blocks smaller than 1214 * the requested size. thus we need to check bt_size. 1215 */ 1216 for (list = first; list < end; list++) { 1217 LIST_FOREACH(bt, list, bt_freelist) { 1218 if (bt->bt_size >= size) { 1219 rc = vmem_fit(bt, size, align, phase, 1220 nocross, minaddr, maxaddr, &start); 1221 if (rc == 0) { 1222 goto gotit; 1223 } 1224 } 1225 } 1226 } 1227 } 1228 VMEM_UNLOCK(vm); 1229 #if 1 1230 if (strat == VM_INSTANTFIT) { 1231 strat = VM_BESTFIT; 1232 goto retry_strat; 1233 } 1234 #endif 1235 if (align != vm->vm_quantum_mask + 1 || phase != 0 || nocross != 0) { 1236 1237 /* 1238 * XXX should try to import a region large enough to 1239 * satisfy restrictions? 1240 */ 1241 1242 goto fail; 1243 } 1244 /* XXX eeek, minaddr & maxaddr not respected */ 1245 if (vmem_import(vm, size, flags) == 0) { 1246 goto retry; 1247 } 1248 /* XXX */ 1249 1250 if ((flags & VM_SLEEP) != 0) { 1251 #if defined(_KERNEL) && !defined(_RUMPKERNEL) 1252 mutex_spin_enter(&uvm_fpageqlock); 1253 uvm_kick_pdaemon(); 1254 mutex_spin_exit(&uvm_fpageqlock); 1255 #endif 1256 VMEM_LOCK(vm); 1257 VMEM_CONDVAR_WAIT(vm); 1258 VMEM_UNLOCK(vm); 1259 goto retry; 1260 } 1261 fail: 1262 bt_free(vm, btnew); 1263 bt_free(vm, btnew2); 1264 return ENOMEM; 1265 1266 gotit: 1267 KASSERT(bt->bt_type == BT_TYPE_FREE); 1268 KASSERT(bt->bt_size >= size); 1269 bt_remfree(vm, bt); 1270 vmem_check(vm); 1271 if (bt->bt_start != start) { 1272 btnew2->bt_type = BT_TYPE_FREE; 1273 btnew2->bt_start = bt->bt_start; 1274 btnew2->bt_size = start - bt->bt_start; 1275 bt->bt_start = start; 1276 bt->bt_size -= btnew2->bt_size; 1277 bt_insfree(vm, btnew2); 1278 bt_insseg(vm, btnew2, CIRCLEQ_PREV(bt, bt_seglist)); 1279 btnew2 = NULL; 1280 vmem_check(vm); 1281 } 1282 KASSERT(bt->bt_start == start); 1283 if (bt->bt_size != size && bt->bt_size - size > vm->vm_quantum_mask) { 1284 /* split */ 1285 btnew->bt_type = BT_TYPE_BUSY; 1286 btnew->bt_start = bt->bt_start; 1287 btnew->bt_size = size; 1288 bt->bt_start = bt->bt_start + size; 1289 bt->bt_size -= size; 1290 bt_insfree(vm, bt); 1291 bt_insseg(vm, btnew, CIRCLEQ_PREV(bt, bt_seglist)); 1292 bt_insbusy(vm, btnew); 1293 vmem_check(vm); 1294 VMEM_UNLOCK(vm); 1295 } else { 1296 bt->bt_type = BT_TYPE_BUSY; 1297 bt_insbusy(vm, bt); 1298 vmem_check(vm); 1299 VMEM_UNLOCK(vm); 1300 bt_free(vm, btnew); 1301 btnew = bt; 1302 } 1303 if (btnew2 != NULL) { 1304 bt_free(vm, btnew2); 1305 } 1306 KASSERT(btnew->bt_size >= size); 1307 btnew->bt_type = BT_TYPE_BUSY; 1308 1309 if (addrp != NULL) 1310 *addrp = btnew->bt_start; 1311 return 0; 1312 } 1313 1314 /* 1315 * vmem_free: 1316 * 1317 * => caller must ensure appropriate spl, 1318 * if the arena can be accessed from interrupt context. 1319 */ 1320 1321 void 1322 vmem_free(vmem_t *vm, vmem_addr_t addr, vmem_size_t size) 1323 { 1324 1325 KASSERT(size > 0); 1326 1327 #if defined(QCACHE) 1328 if (size <= vm->vm_qcache_max) { 1329 int qidx = (size + vm->vm_quantum_mask) >> vm->vm_quantum_shift; 1330 qcache_t *qc = vm->vm_qcache[qidx - 1]; 1331 1332 pool_cache_put(qc->qc_cache, (void *)addr); 1333 return; 1334 } 1335 #endif /* defined(QCACHE) */ 1336 1337 vmem_xfree(vm, addr, size); 1338 } 1339 1340 void 1341 vmem_xfree(vmem_t *vm, vmem_addr_t addr, vmem_size_t size) 1342 { 1343 bt_t *bt; 1344 bt_t *t; 1345 LIST_HEAD(, vmem_btag) tofree; 1346 1347 LIST_INIT(&tofree); 1348 1349 KASSERT(size > 0); 1350 1351 VMEM_LOCK(vm); 1352 1353 bt = bt_lookupbusy(vm, addr); 1354 KASSERT(bt != NULL); 1355 KASSERT(bt->bt_start == addr); 1356 KASSERT(bt->bt_size == vmem_roundup_size(vm, size) || 1357 bt->bt_size - vmem_roundup_size(vm, size) <= vm->vm_quantum_mask); 1358 KASSERT(bt->bt_type == BT_TYPE_BUSY); 1359 bt_rembusy(vm, bt); 1360 bt->bt_type = BT_TYPE_FREE; 1361 1362 /* coalesce */ 1363 t = CIRCLEQ_NEXT(bt, bt_seglist); 1364 if (t != NULL && t->bt_type == BT_TYPE_FREE) { 1365 KASSERT(BT_END(bt) < t->bt_start); /* YYY */ 1366 bt_remfree(vm, t); 1367 bt_remseg(vm, t); 1368 bt->bt_size += t->bt_size; 1369 LIST_INSERT_HEAD(&tofree, t, bt_freelist); 1370 } 1371 t = CIRCLEQ_PREV(bt, bt_seglist); 1372 if (t != NULL && t->bt_type == BT_TYPE_FREE) { 1373 KASSERT(BT_END(t) < bt->bt_start); /* YYY */ 1374 bt_remfree(vm, t); 1375 bt_remseg(vm, t); 1376 bt->bt_size += t->bt_size; 1377 bt->bt_start = t->bt_start; 1378 LIST_INSERT_HEAD(&tofree, t, bt_freelist); 1379 } 1380 1381 t = CIRCLEQ_PREV(bt, bt_seglist); 1382 KASSERT(t != NULL); 1383 KASSERT(BT_ISSPAN_P(t) || t->bt_type == BT_TYPE_BUSY); 1384 if (vm->vm_releasefn != NULL && t->bt_type == BT_TYPE_SPAN && 1385 t->bt_size == bt->bt_size) { 1386 vmem_addr_t spanaddr; 1387 vmem_size_t spansize; 1388 1389 KASSERT(t->bt_start == bt->bt_start); 1390 spanaddr = bt->bt_start; 1391 spansize = bt->bt_size; 1392 bt_remseg(vm, bt); 1393 LIST_INSERT_HEAD(&tofree, bt, bt_freelist); 1394 bt_remseg(vm, t); 1395 LIST_INSERT_HEAD(&tofree, t, bt_freelist); 1396 vm->vm_size -= spansize; 1397 VMEM_CONDVAR_BROADCAST(vm); 1398 VMEM_UNLOCK(vm); 1399 (*vm->vm_releasefn)(vm->vm_arg, spanaddr, spansize); 1400 } else { 1401 bt_insfree(vm, bt); 1402 VMEM_CONDVAR_BROADCAST(vm); 1403 VMEM_UNLOCK(vm); 1404 } 1405 1406 while (!LIST_EMPTY(&tofree)) { 1407 t = LIST_FIRST(&tofree); 1408 LIST_REMOVE(t, bt_freelist); 1409 bt_free(vm, t); 1410 } 1411 } 1412 1413 /* 1414 * vmem_add: 1415 * 1416 * => caller must ensure appropriate spl, 1417 * if the arena can be accessed from interrupt context. 1418 */ 1419 1420 int 1421 vmem_add(vmem_t *vm, vmem_addr_t addr, vmem_size_t size, vm_flag_t flags) 1422 { 1423 1424 return vmem_add1(vm, addr, size, flags, BT_TYPE_SPAN_STATIC); 1425 } 1426 1427 /* 1428 * vmem_size: information about arenas size 1429 * 1430 * => return free/allocated size in arena 1431 */ 1432 vmem_size_t 1433 vmem_size(vmem_t *vm, int typemask) 1434 { 1435 1436 switch (typemask) { 1437 case VMEM_ALLOC: 1438 return vm->vm_inuse; 1439 case VMEM_FREE: 1440 return vm->vm_size - vm->vm_inuse; 1441 case VMEM_FREE|VMEM_ALLOC: 1442 return vm->vm_size; 1443 default: 1444 panic("vmem_size"); 1445 } 1446 } 1447 1448 /* ---- rehash */ 1449 1450 #if defined(_KERNEL) 1451 static struct callout vmem_rehash_ch; 1452 static int vmem_rehash_interval; 1453 static struct workqueue *vmem_rehash_wq; 1454 static struct work vmem_rehash_wk; 1455 1456 static void 1457 vmem_rehash_all(struct work *wk, void *dummy) 1458 { 1459 vmem_t *vm; 1460 1461 KASSERT(wk == &vmem_rehash_wk); 1462 mutex_enter(&vmem_list_lock); 1463 LIST_FOREACH(vm, &vmem_list, vm_alllist) { 1464 size_t desired; 1465 size_t current; 1466 1467 if (!VMEM_TRYLOCK(vm)) { 1468 continue; 1469 } 1470 desired = vm->vm_nbusytag; 1471 current = vm->vm_hashsize; 1472 VMEM_UNLOCK(vm); 1473 1474 if (desired > VMEM_HASHSIZE_MAX) { 1475 desired = VMEM_HASHSIZE_MAX; 1476 } else if (desired < VMEM_HASHSIZE_MIN) { 1477 desired = VMEM_HASHSIZE_MIN; 1478 } 1479 if (desired > current * 2 || desired * 2 < current) { 1480 vmem_rehash(vm, desired, VM_NOSLEEP); 1481 } 1482 } 1483 mutex_exit(&vmem_list_lock); 1484 1485 callout_schedule(&vmem_rehash_ch, vmem_rehash_interval); 1486 } 1487 1488 static void 1489 vmem_rehash_all_kick(void *dummy) 1490 { 1491 1492 workqueue_enqueue(vmem_rehash_wq, &vmem_rehash_wk, NULL); 1493 } 1494 1495 void 1496 vmem_rehash_start(void) 1497 { 1498 int error; 1499 1500 error = workqueue_create(&vmem_rehash_wq, "vmem_rehash", 1501 vmem_rehash_all, NULL, PRI_VM, IPL_SOFTCLOCK, WQ_MPSAFE); 1502 if (error) { 1503 panic("%s: workqueue_create %d\n", __func__, error); 1504 } 1505 callout_init(&vmem_rehash_ch, CALLOUT_MPSAFE); 1506 callout_setfunc(&vmem_rehash_ch, vmem_rehash_all_kick, NULL); 1507 1508 vmem_rehash_interval = hz * 10; 1509 callout_schedule(&vmem_rehash_ch, vmem_rehash_interval); 1510 } 1511 #endif /* defined(_KERNEL) */ 1512 1513 /* ---- debug */ 1514 1515 #if defined(DDB) || defined(UNITTEST) || defined(VMEM_SANITY) 1516 1517 static void bt_dump(const bt_t *, void (*)(const char *, ...)); 1518 1519 static const char * 1520 bt_type_string(int type) 1521 { 1522 static const char * const table[] = { 1523 [BT_TYPE_BUSY] = "busy", 1524 [BT_TYPE_FREE] = "free", 1525 [BT_TYPE_SPAN] = "span", 1526 [BT_TYPE_SPAN_STATIC] = "static span", 1527 }; 1528 1529 if (type >= __arraycount(table)) { 1530 return "BOGUS"; 1531 } 1532 return table[type]; 1533 } 1534 1535 static void 1536 bt_dump(const bt_t *bt, void (*pr)(const char *, ...)) 1537 { 1538 1539 (*pr)("\t%p: %" PRIu64 ", %" PRIu64 ", %d(%s)\n", 1540 bt, (uint64_t)bt->bt_start, (uint64_t)bt->bt_size, 1541 bt->bt_type, bt_type_string(bt->bt_type)); 1542 } 1543 1544 static void 1545 vmem_dump(const vmem_t *vm , void (*pr)(const char *, ...)) 1546 { 1547 const bt_t *bt; 1548 int i; 1549 1550 (*pr)("vmem %p '%s'\n", vm, vm->vm_name); 1551 CIRCLEQ_FOREACH(bt, &vm->vm_seglist, bt_seglist) { 1552 bt_dump(bt, pr); 1553 } 1554 1555 for (i = 0; i < VMEM_MAXORDER; i++) { 1556 const struct vmem_freelist *fl = &vm->vm_freelist[i]; 1557 1558 if (LIST_EMPTY(fl)) { 1559 continue; 1560 } 1561 1562 (*pr)("freelist[%d]\n", i); 1563 LIST_FOREACH(bt, fl, bt_freelist) { 1564 bt_dump(bt, pr); 1565 } 1566 } 1567 } 1568 1569 #endif /* defined(DDB) || defined(UNITTEST) || defined(VMEM_SANITY) */ 1570 1571 #if defined(DDB) 1572 static bt_t * 1573 vmem_whatis_lookup(vmem_t *vm, uintptr_t addr) 1574 { 1575 bt_t *bt; 1576 1577 CIRCLEQ_FOREACH(bt, &vm->vm_seglist, bt_seglist) { 1578 if (BT_ISSPAN_P(bt)) { 1579 continue; 1580 } 1581 if (bt->bt_start <= addr && addr <= BT_END(bt)) { 1582 return bt; 1583 } 1584 } 1585 1586 return NULL; 1587 } 1588 1589 void 1590 vmem_whatis(uintptr_t addr, void (*pr)(const char *, ...)) 1591 { 1592 vmem_t *vm; 1593 1594 LIST_FOREACH(vm, &vmem_list, vm_alllist) { 1595 bt_t *bt; 1596 1597 bt = vmem_whatis_lookup(vm, addr); 1598 if (bt == NULL) { 1599 continue; 1600 } 1601 (*pr)("%p is %p+%zu in VMEM '%s' (%s)\n", 1602 (void *)addr, (void *)bt->bt_start, 1603 (size_t)(addr - bt->bt_start), vm->vm_name, 1604 (bt->bt_type == BT_TYPE_BUSY) ? "allocated" : "free"); 1605 } 1606 } 1607 1608 void 1609 vmem_printall(const char *modif, void (*pr)(const char *, ...)) 1610 { 1611 const vmem_t *vm; 1612 1613 LIST_FOREACH(vm, &vmem_list, vm_alllist) { 1614 vmem_dump(vm, pr); 1615 } 1616 } 1617 1618 void 1619 vmem_print(uintptr_t addr, const char *modif, void (*pr)(const char *, ...)) 1620 { 1621 const vmem_t *vm = (const void *)addr; 1622 1623 vmem_dump(vm, pr); 1624 } 1625 #endif /* defined(DDB) */ 1626 1627 #if defined(_KERNEL) 1628 #define vmem_printf printf 1629 #else 1630 #include <stdio.h> 1631 #include <stdarg.h> 1632 1633 static void 1634 vmem_printf(const char *fmt, ...) 1635 { 1636 va_list ap; 1637 va_start(ap, fmt); 1638 vprintf(fmt, ap); 1639 va_end(ap); 1640 } 1641 #endif 1642 1643 #if defined(VMEM_SANITY) 1644 1645 static bool 1646 vmem_check_sanity(vmem_t *vm) 1647 { 1648 const bt_t *bt, *bt2; 1649 1650 KASSERT(vm != NULL); 1651 1652 CIRCLEQ_FOREACH(bt, &vm->vm_seglist, bt_seglist) { 1653 if (bt->bt_start > BT_END(bt)) { 1654 printf("corrupted tag\n"); 1655 bt_dump(bt, vmem_printf); 1656 return false; 1657 } 1658 } 1659 CIRCLEQ_FOREACH(bt, &vm->vm_seglist, bt_seglist) { 1660 CIRCLEQ_FOREACH(bt2, &vm->vm_seglist, bt_seglist) { 1661 if (bt == bt2) { 1662 continue; 1663 } 1664 if (BT_ISSPAN_P(bt) != BT_ISSPAN_P(bt2)) { 1665 continue; 1666 } 1667 if (bt->bt_start <= BT_END(bt2) && 1668 bt2->bt_start <= BT_END(bt)) { 1669 printf("overwrapped tags\n"); 1670 bt_dump(bt, vmem_printf); 1671 bt_dump(bt2, vmem_printf); 1672 return false; 1673 } 1674 } 1675 } 1676 1677 return true; 1678 } 1679 1680 static void 1681 vmem_check(vmem_t *vm) 1682 { 1683 1684 if (!vmem_check_sanity(vm)) { 1685 panic("insanity vmem %p", vm); 1686 } 1687 } 1688 1689 #endif /* defined(VMEM_SANITY) */ 1690 1691 #if defined(UNITTEST) 1692 int 1693 main(void) 1694 { 1695 int rc; 1696 vmem_t *vm; 1697 vmem_addr_t p; 1698 struct reg { 1699 vmem_addr_t p; 1700 vmem_size_t sz; 1701 bool x; 1702 } *reg = NULL; 1703 int nreg = 0; 1704 int nalloc = 0; 1705 int nfree = 0; 1706 vmem_size_t total = 0; 1707 #if 1 1708 vm_flag_t strat = VM_INSTANTFIT; 1709 #else 1710 vm_flag_t strat = VM_BESTFIT; 1711 #endif 1712 1713 vm = vmem_create("test", 0, 0, 1, NULL, NULL, NULL, 0, VM_SLEEP, 1714 #ifdef _KERNEL 1715 IPL_NONE 1716 #else 1717 0 1718 #endif 1719 ); 1720 if (vm == NULL) { 1721 printf("vmem_create\n"); 1722 exit(EXIT_FAILURE); 1723 } 1724 vmem_dump(vm, vmem_printf); 1725 1726 rc = vmem_add(vm, 0, 50, VM_SLEEP); 1727 assert(rc == 0); 1728 rc = vmem_add(vm, 100, 200, VM_SLEEP); 1729 assert(rc == 0); 1730 rc = vmem_add(vm, 2000, 1, VM_SLEEP); 1731 assert(rc == 0); 1732 rc = vmem_add(vm, 40000, 65536, VM_SLEEP); 1733 assert(rc == 0); 1734 rc = vmem_add(vm, 10000, 10000, VM_SLEEP); 1735 assert(rc == 0); 1736 rc = vmem_add(vm, 500, 1000, VM_SLEEP); 1737 assert(rc == 0); 1738 rc = vmem_add(vm, 0xffffff00, 0x100, VM_SLEEP); 1739 assert(rc == 0); 1740 rc = vmem_xalloc(vm, 0x101, 0, 0, 0, 1741 0xffffff00, 0xffffffff, strat|VM_SLEEP, &p); 1742 assert(rc != 0); 1743 rc = vmem_xalloc(vm, 50, 0, 0, 0, 0, 49, strat|VM_SLEEP, &p); 1744 assert(rc == 0 && p == 0); 1745 vmem_xfree(vm, p, 50); 1746 rc = vmem_xalloc(vm, 25, 0, 0, 0, 0, 24, strat|VM_SLEEP, &p); 1747 assert(rc == 0 && p == 0); 1748 rc = vmem_xalloc(vm, 0x100, 0, 0, 0, 1749 0xffffff01, 0xffffffff, strat|VM_SLEEP, &p); 1750 assert(rc != 0); 1751 rc = vmem_xalloc(vm, 0x100, 0, 0, 0, 1752 0xffffff00, 0xfffffffe, strat|VM_SLEEP, &p); 1753 assert(rc != 0); 1754 rc = vmem_xalloc(vm, 0x100, 0, 0, 0, 1755 0xffffff00, 0xffffffff, strat|VM_SLEEP, &p); 1756 assert(rc == 0); 1757 vmem_dump(vm, vmem_printf); 1758 for (;;) { 1759 struct reg *r; 1760 int t = rand() % 100; 1761 1762 if (t > 45) { 1763 /* alloc */ 1764 vmem_size_t sz = rand() % 500 + 1; 1765 bool x; 1766 vmem_size_t align, phase, nocross; 1767 vmem_addr_t minaddr, maxaddr; 1768 1769 if (t > 70) { 1770 x = true; 1771 /* XXX */ 1772 align = 1 << (rand() % 15); 1773 phase = rand() % 65536; 1774 nocross = 1 << (rand() % 15); 1775 if (align <= phase) { 1776 phase = 0; 1777 } 1778 if (VMEM_CROSS_P(phase, phase + sz - 1, 1779 nocross)) { 1780 nocross = 0; 1781 } 1782 do { 1783 minaddr = rand() % 50000; 1784 maxaddr = rand() % 70000; 1785 } while (minaddr > maxaddr); 1786 printf("=== xalloc %" PRIu64 1787 " align=%" PRIu64 ", phase=%" PRIu64 1788 ", nocross=%" PRIu64 ", min=%" PRIu64 1789 ", max=%" PRIu64 "\n", 1790 (uint64_t)sz, 1791 (uint64_t)align, 1792 (uint64_t)phase, 1793 (uint64_t)nocross, 1794 (uint64_t)minaddr, 1795 (uint64_t)maxaddr); 1796 rc = vmem_xalloc(vm, sz, align, phase, nocross, 1797 minaddr, maxaddr, strat|VM_SLEEP, &p); 1798 } else { 1799 x = false; 1800 printf("=== alloc %" PRIu64 "\n", (uint64_t)sz); 1801 rc = vmem_alloc(vm, sz, strat|VM_SLEEP, &p); 1802 } 1803 printf("-> %" PRIu64 "\n", (uint64_t)p); 1804 vmem_dump(vm, vmem_printf); 1805 if (rc != 0) { 1806 if (x) { 1807 continue; 1808 } 1809 break; 1810 } 1811 nreg++; 1812 reg = realloc(reg, sizeof(*reg) * nreg); 1813 r = ®[nreg - 1]; 1814 r->p = p; 1815 r->sz = sz; 1816 r->x = x; 1817 total += sz; 1818 nalloc++; 1819 } else if (nreg != 0) { 1820 /* free */ 1821 r = ®[rand() % nreg]; 1822 printf("=== free %" PRIu64 ", %" PRIu64 "\n", 1823 (uint64_t)r->p, (uint64_t)r->sz); 1824 if (r->x) { 1825 vmem_xfree(vm, r->p, r->sz); 1826 } else { 1827 vmem_free(vm, r->p, r->sz); 1828 } 1829 total -= r->sz; 1830 vmem_dump(vm, vmem_printf); 1831 *r = reg[nreg - 1]; 1832 nreg--; 1833 nfree++; 1834 } 1835 printf("total=%" PRIu64 "\n", (uint64_t)total); 1836 } 1837 fprintf(stderr, "total=%" PRIu64 ", nalloc=%d, nfree=%d\n", 1838 (uint64_t)total, nalloc, nfree); 1839 exit(EXIT_SUCCESS); 1840 } 1841 #endif /* defined(UNITTEST) */ 1842