1 /* $NetBSD: subr_vmem.c,v 1.72 2012/02/10 17:35:47 para Exp $ */ 2 3 /*- 4 * Copyright (c)2006,2007,2008,2009 YAMAMOTO Takashi, 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 */ 28 29 /* 30 * reference: 31 * - Magazines and Vmem: Extending the Slab Allocator 32 * to Many CPUs and Arbitrary Resources 33 * http://www.usenix.org/event/usenix01/bonwick.html 34 */ 35 36 #include <sys/cdefs.h> 37 __KERNEL_RCSID(0, "$NetBSD: subr_vmem.c,v 1.72 2012/02/10 17:35:47 para Exp $"); 38 39 #if defined(_KERNEL) 40 #include "opt_ddb.h" 41 #define QCACHE 42 #endif /* defined(_KERNEL) */ 43 44 #include <sys/param.h> 45 #include <sys/hash.h> 46 #include <sys/queue.h> 47 #include <sys/bitops.h> 48 49 #if defined(_KERNEL) 50 #include <sys/systm.h> 51 #include <sys/kernel.h> /* hz */ 52 #include <sys/callout.h> 53 #include <sys/kmem.h> 54 #include <sys/pool.h> 55 #include <sys/vmem.h> 56 #include <sys/workqueue.h> 57 #include <sys/atomic.h> 58 #include <uvm/uvm.h> 59 #include <uvm/uvm_extern.h> 60 #include <uvm/uvm_km.h> 61 #include <uvm/uvm_page.h> 62 #include <uvm/uvm_pdaemon.h> 63 #else /* defined(_KERNEL) */ 64 #include "../sys/vmem.h" 65 #endif /* defined(_KERNEL) */ 66 67 68 #if defined(_KERNEL) 69 #include <sys/evcnt.h> 70 #define VMEM_EVCNT_DEFINE(name) \ 71 struct evcnt vmem_evcnt_##name = EVCNT_INITIALIZER(EVCNT_TYPE_MISC, NULL, \ 72 "vmemev", #name); \ 73 EVCNT_ATTACH_STATIC(vmem_evcnt_##name); 74 #define VMEM_EVCNT_INCR(ev) vmem_evcnt_##ev.ev_count++ 75 #define VMEM_EVCNT_DECR(ev) vmem_evcnt_##ev.ev_count-- 76 77 VMEM_EVCNT_DEFINE(bt_pages) 78 VMEM_EVCNT_DEFINE(bt_count) 79 VMEM_EVCNT_DEFINE(bt_inuse) 80 81 #define LOCK_DECL(name) \ 82 kmutex_t name; char lockpad[COHERENCY_UNIT - sizeof(kmutex_t)] 83 84 #define CONDVAR_DECL(name) \ 85 kcondvar_t name; 86 87 #else /* defined(_KERNEL) */ 88 #include <stdio.h> 89 #include <errno.h> 90 #include <assert.h> 91 #include <stdlib.h> 92 #include <string.h> 93 94 #define VMEM_EVCNT_INCR(ev) /* nothing */ 95 #define VMEM_EVCNT_DECR(ev) /* nothing */ 96 97 #define UNITTEST 98 #define KASSERT(a) assert(a) 99 #define LOCK_DECL(name) /* nothing */ 100 #define CONDVAR_DECL(name) /* nothing */ 101 #define VMEM_CONDVAR_INIT(vm, wchan) /* nothing */ 102 #define VMEM_CONDVAR_BROADCAST(vm) /* nothing */ 103 #define mutex_init(a, b, c) /* nothing */ 104 #define mutex_destroy(a) /* nothing */ 105 #define mutex_enter(a) /* nothing */ 106 #define mutex_tryenter(a) true 107 #define mutex_exit(a) /* nothing */ 108 #define mutex_owned(a) /* nothing */ 109 #define ASSERT_SLEEPABLE() /* nothing */ 110 #define panic(...) printf(__VA_ARGS__); abort() 111 #endif /* defined(_KERNEL) */ 112 113 struct vmem; 114 struct vmem_btag; 115 116 #if defined(VMEM_SANITY) 117 static void vmem_check(vmem_t *); 118 #else /* defined(VMEM_SANITY) */ 119 #define vmem_check(vm) /* nothing */ 120 #endif /* defined(VMEM_SANITY) */ 121 122 #define VMEM_MAXORDER (sizeof(vmem_size_t) * CHAR_BIT) 123 124 #define VMEM_HASHSIZE_MIN 1 /* XXX */ 125 #define VMEM_HASHSIZE_MAX 65536 /* XXX */ 126 #define VMEM_HASHSIZE_INIT 1 127 128 #define VM_FITMASK (VM_BESTFIT | VM_INSTANTFIT) 129 130 CIRCLEQ_HEAD(vmem_seglist, vmem_btag); 131 LIST_HEAD(vmem_freelist, vmem_btag); 132 LIST_HEAD(vmem_hashlist, vmem_btag); 133 134 #if defined(QCACHE) 135 #define VMEM_QCACHE_IDX_MAX 32 136 137 #define QC_NAME_MAX 16 138 139 struct qcache { 140 pool_cache_t qc_cache; 141 vmem_t *qc_vmem; 142 char qc_name[QC_NAME_MAX]; 143 }; 144 typedef struct qcache qcache_t; 145 #define QC_POOL_TO_QCACHE(pool) ((qcache_t *)(pool->pr_qcache)) 146 #endif /* defined(QCACHE) */ 147 148 #define VMEM_NAME_MAX 16 149 150 /* vmem arena */ 151 struct vmem { 152 CONDVAR_DECL(vm_cv); 153 LOCK_DECL(vm_lock); 154 vm_flag_t vm_flags; 155 vmem_import_t *vm_importfn; 156 vmem_release_t *vm_releasefn; 157 size_t vm_nfreetags; 158 LIST_HEAD(, vmem_btag) vm_freetags; 159 void *vm_arg; 160 struct vmem_seglist vm_seglist; 161 struct vmem_freelist vm_freelist[VMEM_MAXORDER]; 162 size_t vm_hashsize; 163 size_t vm_nbusytag; 164 struct vmem_hashlist *vm_hashlist; 165 struct vmem_hashlist vm_hash0; 166 size_t vm_quantum_mask; 167 int vm_quantum_shift; 168 size_t vm_size; 169 size_t vm_inuse; 170 char vm_name[VMEM_NAME_MAX+1]; 171 LIST_ENTRY(vmem) vm_alllist; 172 173 #if defined(QCACHE) 174 /* quantum cache */ 175 size_t vm_qcache_max; 176 struct pool_allocator vm_qcache_allocator; 177 qcache_t vm_qcache_store[VMEM_QCACHE_IDX_MAX]; 178 qcache_t *vm_qcache[VMEM_QCACHE_IDX_MAX]; 179 #endif /* defined(QCACHE) */ 180 }; 181 182 #define VMEM_LOCK(vm) mutex_enter(&vm->vm_lock) 183 #define VMEM_TRYLOCK(vm) mutex_tryenter(&vm->vm_lock) 184 #define VMEM_UNLOCK(vm) mutex_exit(&vm->vm_lock) 185 #define VMEM_LOCK_INIT(vm, ipl) mutex_init(&vm->vm_lock, MUTEX_DEFAULT, ipl) 186 #define VMEM_LOCK_DESTROY(vm) mutex_destroy(&vm->vm_lock) 187 #define VMEM_ASSERT_LOCKED(vm) KASSERT(mutex_owned(&vm->vm_lock)) 188 189 #if defined(_KERNEL) 190 #define VMEM_CONDVAR_INIT(vm, wchan) cv_init(&vm->vm_cv, wchan) 191 #define VMEM_CONDVAR_DESTROY(vm) cv_destroy(&vm->vm_cv) 192 #define VMEM_CONDVAR_WAIT(vm) cv_wait(&vm->vm_cv, &vm->vm_lock) 193 #define VMEM_CONDVAR_BROADCAST(vm) cv_broadcast(&vm->vm_cv) 194 #endif /* defined(_KERNEL) */ 195 196 /* boundary tag */ 197 struct vmem_btag { 198 CIRCLEQ_ENTRY(vmem_btag) bt_seglist; 199 union { 200 LIST_ENTRY(vmem_btag) u_freelist; /* BT_TYPE_FREE */ 201 LIST_ENTRY(vmem_btag) u_hashlist; /* BT_TYPE_BUSY */ 202 } bt_u; 203 #define bt_hashlist bt_u.u_hashlist 204 #define bt_freelist bt_u.u_freelist 205 vmem_addr_t bt_start; 206 vmem_size_t bt_size; 207 int bt_type; 208 }; 209 210 #define BT_TYPE_SPAN 1 211 #define BT_TYPE_SPAN_STATIC 2 212 #define BT_TYPE_FREE 3 213 #define BT_TYPE_BUSY 4 214 #define BT_ISSPAN_P(bt) ((bt)->bt_type <= BT_TYPE_SPAN_STATIC) 215 216 #define BT_END(bt) ((bt)->bt_start + (bt)->bt_size - 1) 217 218 typedef struct vmem_btag bt_t; 219 220 #if defined(_KERNEL) 221 static kmutex_t vmem_list_lock; 222 static LIST_HEAD(, vmem) vmem_list = LIST_HEAD_INITIALIZER(vmem_list); 223 #endif /* defined(_KERNEL) */ 224 225 /* ---- misc */ 226 227 #define VMEM_ALIGNUP(addr, align) \ 228 (-(-(addr) & -(align))) 229 230 #define VMEM_CROSS_P(addr1, addr2, boundary) \ 231 ((((addr1) ^ (addr2)) & -(boundary)) != 0) 232 233 #define ORDER2SIZE(order) ((vmem_size_t)1 << (order)) 234 #define SIZE2ORDER(size) ((int)ilog2(size)) 235 236 #if !defined(_KERNEL) 237 #define xmalloc(sz, flags) malloc(sz) 238 #define xfree(p, sz) free(p) 239 #define bt_alloc(vm, flags) malloc(sizeof(bt_t)) 240 #define bt_free(vm, bt) free(bt) 241 #else /* defined(_KERNEL) */ 242 243 #define xmalloc(sz, flags) \ 244 kmem_alloc(sz, ((flags) & VM_SLEEP) ? KM_SLEEP : KM_NOSLEEP); 245 #define xfree(p, sz) kmem_free(p, sz); 246 247 #define BT_MINRESERVE 6 248 #define BT_MAXFREE 64 249 #define STATIC_VMEM_COUNT 5 250 #define STATIC_BT_COUNT 200 251 #define STATIC_QC_POOL_COUNT (VMEM_QCACHE_IDX_MAX + 1) 252 253 static struct vmem static_vmems[STATIC_VMEM_COUNT]; 254 static int static_vmem_count = STATIC_VMEM_COUNT; 255 256 static struct vmem_btag static_bts[STATIC_BT_COUNT]; 257 static int static_bt_count = STATIC_BT_COUNT; 258 259 static struct pool_cache static_qc_pools[STATIC_QC_POOL_COUNT]; 260 static int static_qc_pool_count = STATIC_QC_POOL_COUNT; 261 262 vmem_t *kmem_va_meta_arena; 263 vmem_t *kmem_meta_arena; 264 265 static kmutex_t vmem_btag_lock; 266 static LIST_HEAD(, vmem_btag) vmem_btag_freelist; 267 static size_t vmem_btag_freelist_count = 0; 268 static size_t vmem_btag_count = STATIC_BT_COUNT; 269 270 /* ---- boundary tag */ 271 272 #define BT_PER_PAGE (PAGE_SIZE / sizeof(bt_t)) 273 274 static int bt_refill(vmem_t *vm, vm_flag_t flags); 275 276 static int 277 bt_refillglobal(vm_flag_t flags) 278 { 279 vmem_addr_t va; 280 bt_t *btp; 281 bt_t *bt; 282 int i; 283 284 mutex_enter(&vmem_btag_lock); 285 if (vmem_btag_freelist_count > (BT_MINRESERVE * 16)) { 286 mutex_exit(&vmem_btag_lock); 287 return 0; 288 } 289 290 if (vmem_alloc(kmem_meta_arena, PAGE_SIZE, 291 (flags & ~VM_FITMASK) | VM_INSTANTFIT | VM_POPULATING, &va) != 0) { 292 mutex_exit(&vmem_btag_lock); 293 return ENOMEM; 294 } 295 VMEM_EVCNT_INCR(bt_pages); 296 297 btp = (void *) va; 298 for (i = 0; i < (BT_PER_PAGE); i++) { 299 bt = btp; 300 memset(bt, 0, sizeof(*bt)); 301 LIST_INSERT_HEAD(&vmem_btag_freelist, bt, 302 bt_freelist); 303 vmem_btag_freelist_count++; 304 vmem_btag_count++; 305 VMEM_EVCNT_INCR(bt_count); 306 btp++; 307 } 308 mutex_exit(&vmem_btag_lock); 309 310 bt_refill(kmem_arena, (flags & ~VM_FITMASK) | VM_INSTANTFIT); 311 bt_refill(kmem_va_meta_arena, (flags & ~VM_FITMASK) | VM_INSTANTFIT); 312 bt_refill(kmem_meta_arena, (flags & ~VM_FITMASK) | VM_INSTANTFIT); 313 314 return 0; 315 } 316 317 static int 318 bt_refill(vmem_t *vm, vm_flag_t flags) 319 { 320 bt_t *bt; 321 322 bt_refillglobal(flags); 323 324 VMEM_LOCK(vm); 325 mutex_enter(&vmem_btag_lock); 326 while (!LIST_EMPTY(&vmem_btag_freelist) && 327 vm->vm_nfreetags < (BT_MINRESERVE * 2)) { 328 bt = LIST_FIRST(&vmem_btag_freelist); 329 LIST_REMOVE(bt, bt_freelist); 330 LIST_INSERT_HEAD(&vm->vm_freetags, bt, bt_freelist); 331 vm->vm_nfreetags++; 332 vmem_btag_freelist_count--; 333 } 334 mutex_exit(&vmem_btag_lock); 335 336 if (vm->vm_nfreetags == 0) { 337 VMEM_UNLOCK(vm); 338 return ENOMEM; 339 } 340 VMEM_UNLOCK(vm); 341 342 return 0; 343 } 344 345 static inline bt_t * 346 bt_alloc(vmem_t *vm, vm_flag_t flags) 347 { 348 bt_t *bt; 349 again: 350 VMEM_LOCK(vm); 351 if (vm->vm_nfreetags < BT_MINRESERVE && 352 (flags & VM_POPULATING) == 0) { 353 VMEM_UNLOCK(vm); 354 if (bt_refill(vm, VM_NOSLEEP | VM_INSTANTFIT)) { 355 return NULL; 356 } 357 goto again; 358 } 359 bt = LIST_FIRST(&vm->vm_freetags); 360 LIST_REMOVE(bt, bt_freelist); 361 vm->vm_nfreetags--; 362 VMEM_UNLOCK(vm); 363 VMEM_EVCNT_INCR(bt_inuse); 364 365 return bt; 366 } 367 368 static inline void 369 bt_free(vmem_t *vm, bt_t *bt) 370 { 371 372 VMEM_LOCK(vm); 373 LIST_INSERT_HEAD(&vm->vm_freetags, bt, bt_freelist); 374 vm->vm_nfreetags++; 375 while (vm->vm_nfreetags > BT_MAXFREE) { 376 bt = LIST_FIRST(&vm->vm_freetags); 377 LIST_REMOVE(bt, bt_freelist); 378 vm->vm_nfreetags--; 379 mutex_enter(&vmem_btag_lock); 380 LIST_INSERT_HEAD(&vmem_btag_freelist, bt, bt_freelist); 381 vmem_btag_freelist_count++; 382 mutex_exit(&vmem_btag_lock); 383 } 384 VMEM_UNLOCK(vm); 385 VMEM_EVCNT_DECR(bt_inuse); 386 } 387 388 #endif /* defined(_KERNEL) */ 389 390 /* 391 * freelist[0] ... [1, 1] 392 * freelist[1] ... [2, 3] 393 * freelist[2] ... [4, 7] 394 * freelist[3] ... [8, 15] 395 * : 396 * freelist[n] ... [(1 << n), (1 << (n + 1)) - 1] 397 * : 398 */ 399 400 static struct vmem_freelist * 401 bt_freehead_tofree(vmem_t *vm, vmem_size_t size) 402 { 403 const vmem_size_t qsize = size >> vm->vm_quantum_shift; 404 const int idx = SIZE2ORDER(qsize); 405 406 KASSERT(size != 0 && qsize != 0); 407 KASSERT((size & vm->vm_quantum_mask) == 0); 408 KASSERT(idx >= 0); 409 KASSERT(idx < VMEM_MAXORDER); 410 411 return &vm->vm_freelist[idx]; 412 } 413 414 /* 415 * bt_freehead_toalloc: return the freelist for the given size and allocation 416 * strategy. 417 * 418 * for VM_INSTANTFIT, return the list in which any blocks are large enough 419 * for the requested size. otherwise, return the list which can have blocks 420 * large enough for the requested size. 421 */ 422 423 static struct vmem_freelist * 424 bt_freehead_toalloc(vmem_t *vm, vmem_size_t size, vm_flag_t strat) 425 { 426 const vmem_size_t qsize = size >> vm->vm_quantum_shift; 427 int idx = SIZE2ORDER(qsize); 428 429 KASSERT(size != 0 && qsize != 0); 430 KASSERT((size & vm->vm_quantum_mask) == 0); 431 432 if (strat == VM_INSTANTFIT && ORDER2SIZE(idx) != qsize) { 433 idx++; 434 /* check too large request? */ 435 } 436 KASSERT(idx >= 0); 437 KASSERT(idx < VMEM_MAXORDER); 438 439 return &vm->vm_freelist[idx]; 440 } 441 442 /* ---- boundary tag hash */ 443 444 static struct vmem_hashlist * 445 bt_hashhead(vmem_t *vm, vmem_addr_t addr) 446 { 447 struct vmem_hashlist *list; 448 unsigned int hash; 449 450 hash = hash32_buf(&addr, sizeof(addr), HASH32_BUF_INIT); 451 list = &vm->vm_hashlist[hash % vm->vm_hashsize]; 452 453 return list; 454 } 455 456 static bt_t * 457 bt_lookupbusy(vmem_t *vm, vmem_addr_t addr) 458 { 459 struct vmem_hashlist *list; 460 bt_t *bt; 461 462 list = bt_hashhead(vm, addr); 463 LIST_FOREACH(bt, list, bt_hashlist) { 464 if (bt->bt_start == addr) { 465 break; 466 } 467 } 468 469 return bt; 470 } 471 472 static void 473 bt_rembusy(vmem_t *vm, bt_t *bt) 474 { 475 476 KASSERT(vm->vm_nbusytag > 0); 477 vm->vm_nbusytag--; 478 LIST_REMOVE(bt, bt_hashlist); 479 } 480 481 static void 482 bt_insbusy(vmem_t *vm, bt_t *bt) 483 { 484 struct vmem_hashlist *list; 485 486 KASSERT(bt->bt_type == BT_TYPE_BUSY); 487 488 list = bt_hashhead(vm, bt->bt_start); 489 LIST_INSERT_HEAD(list, bt, bt_hashlist); 490 vm->vm_nbusytag++; 491 } 492 493 /* ---- boundary tag list */ 494 495 static void 496 bt_remseg(vmem_t *vm, bt_t *bt) 497 { 498 499 CIRCLEQ_REMOVE(&vm->vm_seglist, bt, bt_seglist); 500 } 501 502 static void 503 bt_insseg(vmem_t *vm, bt_t *bt, bt_t *prev) 504 { 505 506 CIRCLEQ_INSERT_AFTER(&vm->vm_seglist, prev, bt, bt_seglist); 507 } 508 509 static void 510 bt_insseg_tail(vmem_t *vm, bt_t *bt) 511 { 512 513 CIRCLEQ_INSERT_TAIL(&vm->vm_seglist, bt, bt_seglist); 514 } 515 516 static void 517 bt_remfree(vmem_t *vm, bt_t *bt) 518 { 519 520 KASSERT(bt->bt_type == BT_TYPE_FREE); 521 522 LIST_REMOVE(bt, bt_freelist); 523 } 524 525 static void 526 bt_insfree(vmem_t *vm, bt_t *bt) 527 { 528 struct vmem_freelist *list; 529 530 list = bt_freehead_tofree(vm, bt->bt_size); 531 LIST_INSERT_HEAD(list, bt, bt_freelist); 532 } 533 534 /* ---- vmem internal functions */ 535 536 #if defined(QCACHE) 537 static inline vm_flag_t 538 prf_to_vmf(int prflags) 539 { 540 vm_flag_t vmflags; 541 542 KASSERT((prflags & ~(PR_LIMITFAIL | PR_WAITOK | PR_NOWAIT)) == 0); 543 if ((prflags & PR_WAITOK) != 0) { 544 vmflags = VM_SLEEP; 545 } else { 546 vmflags = VM_NOSLEEP; 547 } 548 return vmflags; 549 } 550 551 static inline int 552 vmf_to_prf(vm_flag_t vmflags) 553 { 554 int prflags; 555 556 if ((vmflags & VM_SLEEP) != 0) { 557 prflags = PR_WAITOK; 558 } else { 559 prflags = PR_NOWAIT; 560 } 561 return prflags; 562 } 563 564 static size_t 565 qc_poolpage_size(size_t qcache_max) 566 { 567 int i; 568 569 for (i = 0; ORDER2SIZE(i) <= qcache_max * 3; i++) { 570 /* nothing */ 571 } 572 return ORDER2SIZE(i); 573 } 574 575 static void * 576 qc_poolpage_alloc(struct pool *pool, int prflags) 577 { 578 qcache_t *qc = QC_POOL_TO_QCACHE(pool); 579 vmem_t *vm = qc->qc_vmem; 580 vmem_addr_t addr; 581 582 if (vmem_alloc(vm, pool->pr_alloc->pa_pagesz, 583 prf_to_vmf(prflags) | VM_INSTANTFIT, &addr) != 0) 584 return NULL; 585 return (void *)addr; 586 } 587 588 static void 589 qc_poolpage_free(struct pool *pool, void *addr) 590 { 591 qcache_t *qc = QC_POOL_TO_QCACHE(pool); 592 vmem_t *vm = qc->qc_vmem; 593 594 vmem_free(vm, (vmem_addr_t)addr, pool->pr_alloc->pa_pagesz); 595 } 596 597 static void 598 qc_init(vmem_t *vm, size_t qcache_max, int ipl) 599 { 600 qcache_t *prevqc; 601 struct pool_allocator *pa; 602 int qcache_idx_max; 603 int i; 604 605 KASSERT((qcache_max & vm->vm_quantum_mask) == 0); 606 if (qcache_max > (VMEM_QCACHE_IDX_MAX << vm->vm_quantum_shift)) { 607 qcache_max = VMEM_QCACHE_IDX_MAX << vm->vm_quantum_shift; 608 } 609 vm->vm_qcache_max = qcache_max; 610 pa = &vm->vm_qcache_allocator; 611 memset(pa, 0, sizeof(*pa)); 612 pa->pa_alloc = qc_poolpage_alloc; 613 pa->pa_free = qc_poolpage_free; 614 pa->pa_pagesz = qc_poolpage_size(qcache_max); 615 616 qcache_idx_max = qcache_max >> vm->vm_quantum_shift; 617 prevqc = NULL; 618 for (i = qcache_idx_max; i > 0; i--) { 619 qcache_t *qc = &vm->vm_qcache_store[i - 1]; 620 size_t size = i << vm->vm_quantum_shift; 621 pool_cache_t pc; 622 623 qc->qc_vmem = vm; 624 snprintf(qc->qc_name, sizeof(qc->qc_name), "%s-%zu", 625 vm->vm_name, size); 626 627 if (vm->vm_flags & VM_BOOTSTRAP) { 628 KASSERT(static_qc_pool_count > 0); 629 pc = &static_qc_pools[--static_qc_pool_count]; 630 pool_cache_bootstrap(pc, size, 631 ORDER2SIZE(vm->vm_quantum_shift), 0, 632 PR_NOALIGN | PR_NOTOUCH | PR_RECURSIVE /* XXX */, 633 qc->qc_name, pa, ipl, NULL, NULL, NULL); 634 } else { 635 pc = pool_cache_init(size, 636 ORDER2SIZE(vm->vm_quantum_shift), 0, 637 PR_NOALIGN | PR_NOTOUCH /* XXX */, 638 qc->qc_name, pa, ipl, NULL, NULL, NULL); 639 } 640 qc->qc_cache = pc; 641 KASSERT(qc->qc_cache != NULL); /* XXX */ 642 if (prevqc != NULL && 643 qc->qc_cache->pc_pool.pr_itemsperpage == 644 prevqc->qc_cache->pc_pool.pr_itemsperpage) { 645 if (vm->vm_flags & VM_BOOTSTRAP) { 646 pool_cache_bootstrap_destroy(pc); 647 //static_qc_pool_count++; 648 } else { 649 pool_cache_destroy(qc->qc_cache); 650 } 651 vm->vm_qcache[i - 1] = prevqc; 652 continue; 653 } 654 qc->qc_cache->pc_pool.pr_qcache = qc; 655 vm->vm_qcache[i - 1] = qc; 656 prevqc = qc; 657 } 658 } 659 660 static void 661 qc_destroy(vmem_t *vm) 662 { 663 const qcache_t *prevqc; 664 int i; 665 int qcache_idx_max; 666 667 qcache_idx_max = vm->vm_qcache_max >> vm->vm_quantum_shift; 668 prevqc = NULL; 669 for (i = 0; i < qcache_idx_max; i++) { 670 qcache_t *qc = vm->vm_qcache[i]; 671 672 if (prevqc == qc) { 673 continue; 674 } 675 if (vm->vm_flags & VM_BOOTSTRAP) { 676 pool_cache_bootstrap_destroy(qc->qc_cache); 677 } else { 678 pool_cache_destroy(qc->qc_cache); 679 } 680 prevqc = qc; 681 } 682 } 683 #endif 684 685 #if defined(_KERNEL) 686 void 687 vmem_bootstrap(void) 688 { 689 690 mutex_init(&vmem_list_lock, MUTEX_DEFAULT, IPL_VM); 691 mutex_init(&vmem_btag_lock, MUTEX_DEFAULT, IPL_VM); 692 693 while (static_bt_count-- > 0) { 694 bt_t *bt = &static_bts[static_bt_count]; 695 LIST_INSERT_HEAD(&vmem_btag_freelist, bt, bt_freelist); 696 VMEM_EVCNT_INCR(bt_count); 697 vmem_btag_freelist_count++; 698 } 699 } 700 701 void 702 vmem_init(vmem_t *vm) 703 { 704 705 kmem_va_meta_arena = vmem_create("vmem-va", 0, 0, PAGE_SIZE, 706 vmem_alloc, vmem_free, vm, 707 0, VM_NOSLEEP | VM_BOOTSTRAP | VM_LARGEIMPORT, 708 IPL_VM); 709 710 kmem_meta_arena = vmem_create("vmem-meta", 0, 0, PAGE_SIZE, 711 uvm_km_kmem_alloc, uvm_km_kmem_free, kmem_va_meta_arena, 712 0, VM_NOSLEEP | VM_BOOTSTRAP, IPL_VM); 713 } 714 #endif /* defined(_KERNEL) */ 715 716 static int 717 vmem_add1(vmem_t *vm, vmem_addr_t addr, vmem_size_t size, vm_flag_t flags, 718 int spanbttype) 719 { 720 bt_t *btspan; 721 bt_t *btfree; 722 723 KASSERT((flags & (VM_SLEEP|VM_NOSLEEP)) != 0); 724 KASSERT((~flags & (VM_SLEEP|VM_NOSLEEP)) != 0); 725 KASSERT(spanbttype == BT_TYPE_SPAN || 726 spanbttype == BT_TYPE_SPAN_STATIC); 727 728 btspan = bt_alloc(vm, flags); 729 if (btspan == NULL) { 730 return ENOMEM; 731 } 732 btfree = bt_alloc(vm, flags); 733 if (btfree == NULL) { 734 bt_free(vm, btspan); 735 return ENOMEM; 736 } 737 738 btspan->bt_type = spanbttype; 739 btspan->bt_start = addr; 740 btspan->bt_size = size; 741 742 btfree->bt_type = BT_TYPE_FREE; 743 btfree->bt_start = addr; 744 btfree->bt_size = size; 745 746 VMEM_LOCK(vm); 747 bt_insseg_tail(vm, btspan); 748 bt_insseg(vm, btfree, btspan); 749 bt_insfree(vm, btfree); 750 vm->vm_size += size; 751 VMEM_UNLOCK(vm); 752 753 return 0; 754 } 755 756 static void 757 vmem_destroy1(vmem_t *vm) 758 { 759 760 #if defined(QCACHE) 761 qc_destroy(vm); 762 #endif /* defined(QCACHE) */ 763 if (vm->vm_hashlist != NULL) { 764 int i; 765 766 for (i = 0; i < vm->vm_hashsize; i++) { 767 bt_t *bt; 768 769 while ((bt = LIST_FIRST(&vm->vm_hashlist[i])) != NULL) { 770 KASSERT(bt->bt_type == BT_TYPE_SPAN_STATIC); 771 bt_free(vm, bt); 772 } 773 } 774 if (vm->vm_hashlist != &vm->vm_hash0) { 775 xfree(vm->vm_hashlist, 776 sizeof(struct vmem_hashlist *) * vm->vm_hashsize); 777 } 778 } 779 780 while (vm->vm_nfreetags > 0) { 781 bt_t *bt = LIST_FIRST(&vm->vm_freetags); 782 LIST_REMOVE(bt, bt_freelist); 783 vm->vm_nfreetags--; 784 mutex_enter(&vmem_btag_lock); 785 #if defined (_KERNEL) 786 LIST_INSERT_HEAD(&vmem_btag_freelist, bt, bt_freelist); 787 vmem_btag_freelist_count++; 788 #endif /* defined(_KERNEL) */ 789 mutex_exit(&vmem_btag_lock); 790 } 791 792 VMEM_LOCK_DESTROY(vm); 793 xfree(vm, sizeof(*vm)); 794 } 795 796 static int 797 vmem_import(vmem_t *vm, vmem_size_t size, vm_flag_t flags) 798 { 799 vmem_addr_t addr; 800 int rc; 801 802 if (vm->vm_importfn == NULL) { 803 return EINVAL; 804 } 805 806 if (vm->vm_flags & VM_LARGEIMPORT) { 807 size *= 8; 808 } 809 810 if (vm->vm_flags & VM_XIMPORT) { 811 rc = ((vmem_ximport_t *)vm->vm_importfn)(vm->vm_arg, size, 812 &size, flags, &addr); 813 } else { 814 rc = (vm->vm_importfn)(vm->vm_arg, size, flags, &addr); 815 } 816 if (rc) { 817 return ENOMEM; 818 } 819 820 if (vmem_add1(vm, addr, size, flags, BT_TYPE_SPAN) != 0) { 821 (*vm->vm_releasefn)(vm->vm_arg, addr, size); 822 return ENOMEM; 823 } 824 825 return 0; 826 } 827 828 static int 829 vmem_rehash(vmem_t *vm, size_t newhashsize, vm_flag_t flags) 830 { 831 bt_t *bt; 832 int i; 833 struct vmem_hashlist *newhashlist; 834 struct vmem_hashlist *oldhashlist; 835 size_t oldhashsize; 836 837 KASSERT(newhashsize > 0); 838 839 newhashlist = 840 xmalloc(sizeof(struct vmem_hashlist *) * newhashsize, flags); 841 if (newhashlist == NULL) { 842 return ENOMEM; 843 } 844 for (i = 0; i < newhashsize; i++) { 845 LIST_INIT(&newhashlist[i]); 846 } 847 848 if (!VMEM_TRYLOCK(vm)) { 849 xfree(newhashlist, 850 sizeof(struct vmem_hashlist *) * newhashsize); 851 return EBUSY; 852 } 853 oldhashlist = vm->vm_hashlist; 854 oldhashsize = vm->vm_hashsize; 855 vm->vm_hashlist = newhashlist; 856 vm->vm_hashsize = newhashsize; 857 if (oldhashlist == NULL) { 858 VMEM_UNLOCK(vm); 859 return 0; 860 } 861 for (i = 0; i < oldhashsize; i++) { 862 while ((bt = LIST_FIRST(&oldhashlist[i])) != NULL) { 863 bt_rembusy(vm, bt); /* XXX */ 864 bt_insbusy(vm, bt); 865 } 866 } 867 VMEM_UNLOCK(vm); 868 869 if (oldhashlist != &vm->vm_hash0) { 870 xfree(oldhashlist, 871 sizeof(struct vmem_hashlist *) * oldhashsize); 872 } 873 874 return 0; 875 } 876 877 /* 878 * vmem_fit: check if a bt can satisfy the given restrictions. 879 * 880 * it's a caller's responsibility to ensure the region is big enough 881 * before calling us. 882 */ 883 884 static int 885 vmem_fit(const bt_t const *bt, vmem_size_t size, vmem_size_t align, 886 vmem_size_t phase, vmem_size_t nocross, 887 vmem_addr_t minaddr, vmem_addr_t maxaddr, vmem_addr_t *addrp) 888 { 889 vmem_addr_t start; 890 vmem_addr_t end; 891 892 KASSERT(size > 0); 893 KASSERT(bt->bt_size >= size); /* caller's responsibility */ 894 895 /* 896 * XXX assumption: vmem_addr_t and vmem_size_t are 897 * unsigned integer of the same size. 898 */ 899 900 start = bt->bt_start; 901 if (start < minaddr) { 902 start = minaddr; 903 } 904 end = BT_END(bt); 905 if (end > maxaddr) { 906 end = maxaddr; 907 } 908 if (start > end) { 909 return ENOMEM; 910 } 911 912 start = VMEM_ALIGNUP(start - phase, align) + phase; 913 if (start < bt->bt_start) { 914 start += align; 915 } 916 if (VMEM_CROSS_P(start, start + size - 1, nocross)) { 917 KASSERT(align < nocross); 918 start = VMEM_ALIGNUP(start - phase, nocross) + phase; 919 } 920 if (start <= end && end - start >= size - 1) { 921 KASSERT((start & (align - 1)) == phase); 922 KASSERT(!VMEM_CROSS_P(start, start + size - 1, nocross)); 923 KASSERT(minaddr <= start); 924 KASSERT(maxaddr == 0 || start + size - 1 <= maxaddr); 925 KASSERT(bt->bt_start <= start); 926 KASSERT(BT_END(bt) - start >= size - 1); 927 *addrp = start; 928 return 0; 929 } 930 return ENOMEM; 931 } 932 933 934 /* 935 * vmem_create_internal: creates a vmem arena. 936 */ 937 938 static vmem_t * 939 vmem_create_internal(const char *name, vmem_addr_t base, vmem_size_t size, 940 vmem_size_t quantum, vmem_import_t *importfn, vmem_release_t *releasefn, 941 void *arg, vmem_size_t qcache_max, vm_flag_t flags, int ipl) 942 { 943 vmem_t *vm = NULL; 944 int i; 945 946 KASSERT((flags & (VM_SLEEP|VM_NOSLEEP)) != 0); 947 KASSERT((~flags & (VM_SLEEP|VM_NOSLEEP)) != 0); 948 KASSERT(quantum > 0); 949 950 if (flags & VM_BOOTSTRAP) { 951 #if defined(_KERNEL) 952 KASSERT(static_vmem_count > 0); 953 vm = &static_vmems[--static_vmem_count]; 954 #endif /* defined(_KERNEL) */ 955 } else { 956 vm = xmalloc(sizeof(*vm), flags); 957 } 958 if (vm == NULL) { 959 return NULL; 960 } 961 962 VMEM_CONDVAR_INIT(vm, "vmem"); 963 VMEM_LOCK_INIT(vm, ipl); 964 vm->vm_flags = flags; 965 vm->vm_nfreetags = 0; 966 LIST_INIT(&vm->vm_freetags); 967 strlcpy(vm->vm_name, name, sizeof(vm->vm_name)); 968 vm->vm_quantum_mask = quantum - 1; 969 vm->vm_quantum_shift = SIZE2ORDER(quantum); 970 KASSERT(ORDER2SIZE(vm->vm_quantum_shift) == quantum); 971 vm->vm_importfn = importfn; 972 vm->vm_releasefn = releasefn; 973 vm->vm_arg = arg; 974 vm->vm_nbusytag = 0; 975 vm->vm_size = 0; 976 vm->vm_inuse = 0; 977 #if defined(QCACHE) 978 qc_init(vm, qcache_max, ipl); 979 #endif /* defined(QCACHE) */ 980 981 CIRCLEQ_INIT(&vm->vm_seglist); 982 for (i = 0; i < VMEM_MAXORDER; i++) { 983 LIST_INIT(&vm->vm_freelist[i]); 984 } 985 vm->vm_hashlist = NULL; 986 if (flags & VM_BOOTSTRAP) { 987 vm->vm_hashsize = 1; 988 vm->vm_hashlist = &vm->vm_hash0; 989 } else if (vmem_rehash(vm, VMEM_HASHSIZE_INIT, flags)) { 990 vmem_destroy1(vm); 991 return NULL; 992 } 993 994 if (size != 0) { 995 if (vmem_add(vm, base, size, flags) != 0) { 996 vmem_destroy1(vm); 997 return NULL; 998 } 999 } 1000 1001 #if defined(_KERNEL) 1002 if (flags & VM_BOOTSTRAP) { 1003 bt_refill(vm, VM_NOSLEEP); 1004 } 1005 1006 mutex_enter(&vmem_list_lock); 1007 LIST_INSERT_HEAD(&vmem_list, vm, vm_alllist); 1008 mutex_exit(&vmem_list_lock); 1009 #endif /* defined(_KERNEL) */ 1010 1011 return vm; 1012 } 1013 1014 1015 /* ---- vmem API */ 1016 1017 /* 1018 * vmem_create: create an arena. 1019 * 1020 * => must not be called from interrupt context. 1021 */ 1022 1023 vmem_t * 1024 vmem_create(const char *name, vmem_addr_t base, vmem_size_t size, 1025 vmem_size_t quantum, vmem_import_t *importfn, vmem_release_t *releasefn, 1026 vmem_t *source, vmem_size_t qcache_max, vm_flag_t flags, int ipl) 1027 { 1028 1029 KASSERT((flags & (VM_SLEEP|VM_NOSLEEP)) != 0); 1030 KASSERT((~flags & (VM_SLEEP|VM_NOSLEEP)) != 0); 1031 KASSERT((flags & (VM_XIMPORT)) == 0); 1032 1033 return vmem_create_internal(name, base, size, quantum, 1034 importfn, releasefn, source, qcache_max, flags, ipl); 1035 } 1036 1037 /* 1038 * vmem_xcreate: create an arena takes alternative import func. 1039 * 1040 * => must not be called from interrupt context. 1041 */ 1042 1043 vmem_t * 1044 vmem_xcreate(const char *name, vmem_addr_t base, vmem_size_t size, 1045 vmem_size_t quantum, vmem_ximport_t *importfn, vmem_release_t *releasefn, 1046 vmem_t *source, vmem_size_t qcache_max, vm_flag_t flags, int ipl) 1047 { 1048 1049 KASSERT((flags & (VM_SLEEP|VM_NOSLEEP)) != 0); 1050 KASSERT((~flags & (VM_SLEEP|VM_NOSLEEP)) != 0); 1051 KASSERT((flags & (VM_XIMPORT)) == 0); 1052 1053 return vmem_create_internal(name, base, size, quantum, 1054 (vmem_import_t *)importfn, releasefn, source, 1055 qcache_max, flags | VM_XIMPORT, ipl); 1056 } 1057 1058 void 1059 vmem_destroy(vmem_t *vm) 1060 { 1061 1062 #if defined(_KERNEL) 1063 mutex_enter(&vmem_list_lock); 1064 LIST_REMOVE(vm, vm_alllist); 1065 mutex_exit(&vmem_list_lock); 1066 #endif /* defined(_KERNEL) */ 1067 1068 vmem_destroy1(vm); 1069 } 1070 1071 vmem_size_t 1072 vmem_roundup_size(vmem_t *vm, vmem_size_t size) 1073 { 1074 1075 return (size + vm->vm_quantum_mask) & ~vm->vm_quantum_mask; 1076 } 1077 1078 /* 1079 * vmem_alloc: 1080 * 1081 * => caller must ensure appropriate spl, 1082 * if the arena can be accessed from interrupt context. 1083 */ 1084 1085 int 1086 vmem_alloc(vmem_t *vm, vmem_size_t size, vm_flag_t flags, vmem_addr_t *addrp) 1087 { 1088 const vm_flag_t strat __unused = flags & VM_FITMASK; 1089 1090 KASSERT((flags & (VM_SLEEP|VM_NOSLEEP)) != 0); 1091 KASSERT((~flags & (VM_SLEEP|VM_NOSLEEP)) != 0); 1092 1093 KASSERT(size > 0); 1094 KASSERT(strat == VM_BESTFIT || strat == VM_INSTANTFIT); 1095 if ((flags & VM_SLEEP) != 0) { 1096 ASSERT_SLEEPABLE(); 1097 } 1098 1099 #if defined(QCACHE) 1100 if (size <= vm->vm_qcache_max) { 1101 void *p; 1102 int qidx = (size + vm->vm_quantum_mask) >> vm->vm_quantum_shift; 1103 qcache_t *qc = vm->vm_qcache[qidx - 1]; 1104 1105 p = pool_cache_get(qc->qc_cache, vmf_to_prf(flags)); 1106 if (addrp != NULL) 1107 *addrp = (vmem_addr_t)p; 1108 return (p == NULL) ? ENOMEM : 0; 1109 } 1110 #endif /* defined(QCACHE) */ 1111 1112 return vmem_xalloc(vm, size, 0, 0, 0, VMEM_ADDR_MIN, VMEM_ADDR_MAX, 1113 flags, addrp); 1114 } 1115 1116 int 1117 vmem_xalloc(vmem_t *vm, const vmem_size_t size0, vmem_size_t align, 1118 const vmem_size_t phase, const vmem_size_t nocross, 1119 const vmem_addr_t minaddr, const vmem_addr_t maxaddr, const vm_flag_t flags, 1120 vmem_addr_t *addrp) 1121 { 1122 struct vmem_freelist *list; 1123 struct vmem_freelist *first; 1124 struct vmem_freelist *end; 1125 bt_t *bt; 1126 bt_t *btnew; 1127 bt_t *btnew2; 1128 const vmem_size_t size = vmem_roundup_size(vm, size0); 1129 vm_flag_t strat = flags & VM_FITMASK; 1130 vmem_addr_t start; 1131 int rc; 1132 1133 KASSERT(size0 > 0); 1134 KASSERT(size > 0); 1135 KASSERT(strat == VM_BESTFIT || strat == VM_INSTANTFIT); 1136 if ((flags & VM_SLEEP) != 0) { 1137 ASSERT_SLEEPABLE(); 1138 } 1139 KASSERT((align & vm->vm_quantum_mask) == 0); 1140 KASSERT((align & (align - 1)) == 0); 1141 KASSERT((phase & vm->vm_quantum_mask) == 0); 1142 KASSERT((nocross & vm->vm_quantum_mask) == 0); 1143 KASSERT((nocross & (nocross - 1)) == 0); 1144 KASSERT((align == 0 && phase == 0) || phase < align); 1145 KASSERT(nocross == 0 || nocross >= size); 1146 KASSERT(minaddr <= maxaddr); 1147 KASSERT(!VMEM_CROSS_P(phase, phase + size - 1, nocross)); 1148 1149 if (align == 0) { 1150 align = vm->vm_quantum_mask + 1; 1151 } 1152 1153 /* 1154 * allocate boundary tags before acquiring the vmem lock. 1155 */ 1156 btnew = bt_alloc(vm, flags); 1157 if (btnew == NULL) { 1158 return ENOMEM; 1159 } 1160 btnew2 = bt_alloc(vm, flags); /* XXX not necessary if no restrictions */ 1161 if (btnew2 == NULL) { 1162 bt_free(vm, btnew); 1163 return ENOMEM; 1164 } 1165 1166 /* 1167 * choose a free block from which we allocate. 1168 */ 1169 retry_strat: 1170 first = bt_freehead_toalloc(vm, size, strat); 1171 end = &vm->vm_freelist[VMEM_MAXORDER]; 1172 retry: 1173 bt = NULL; 1174 VMEM_LOCK(vm); 1175 vmem_check(vm); 1176 if (strat == VM_INSTANTFIT) { 1177 /* 1178 * just choose the first block which satisfies our restrictions. 1179 * 1180 * note that we don't need to check the size of the blocks 1181 * because any blocks found on these list should be larger than 1182 * the given size. 1183 */ 1184 for (list = first; list < end; list++) { 1185 bt = LIST_FIRST(list); 1186 if (bt != NULL) { 1187 rc = vmem_fit(bt, size, align, phase, 1188 nocross, minaddr, maxaddr, &start); 1189 if (rc == 0) { 1190 goto gotit; 1191 } 1192 /* 1193 * don't bother to follow the bt_freelist link 1194 * here. the list can be very long and we are 1195 * told to run fast. blocks from the later free 1196 * lists are larger and have better chances to 1197 * satisfy our restrictions. 1198 */ 1199 } 1200 } 1201 } else { /* VM_BESTFIT */ 1202 /* 1203 * we assume that, for space efficiency, it's better to 1204 * allocate from a smaller block. thus we will start searching 1205 * from the lower-order list than VM_INSTANTFIT. 1206 * however, don't bother to find the smallest block in a free 1207 * list because the list can be very long. we can revisit it 1208 * if/when it turns out to be a problem. 1209 * 1210 * note that the 'first' list can contain blocks smaller than 1211 * the requested size. thus we need to check bt_size. 1212 */ 1213 for (list = first; list < end; list++) { 1214 LIST_FOREACH(bt, list, bt_freelist) { 1215 if (bt->bt_size >= size) { 1216 rc = vmem_fit(bt, size, align, phase, 1217 nocross, minaddr, maxaddr, &start); 1218 if (rc == 0) { 1219 goto gotit; 1220 } 1221 } 1222 } 1223 } 1224 } 1225 VMEM_UNLOCK(vm); 1226 #if 1 1227 if (strat == VM_INSTANTFIT) { 1228 strat = VM_BESTFIT; 1229 goto retry_strat; 1230 } 1231 #endif 1232 if (align != vm->vm_quantum_mask + 1 || phase != 0 || nocross != 0) { 1233 1234 /* 1235 * XXX should try to import a region large enough to 1236 * satisfy restrictions? 1237 */ 1238 1239 goto fail; 1240 } 1241 /* XXX eeek, minaddr & maxaddr not respected */ 1242 if (vmem_import(vm, size, flags) == 0) { 1243 goto retry; 1244 } 1245 /* XXX */ 1246 1247 if ((flags & VM_SLEEP) != 0) { 1248 #if defined(_KERNEL) && !defined(_RUMPKERNEL) 1249 mutex_spin_enter(&uvm_fpageqlock); 1250 uvm_kick_pdaemon(); 1251 mutex_spin_exit(&uvm_fpageqlock); 1252 #endif 1253 VMEM_LOCK(vm); 1254 VMEM_CONDVAR_WAIT(vm); 1255 VMEM_UNLOCK(vm); 1256 goto retry; 1257 } 1258 fail: 1259 bt_free(vm, btnew); 1260 bt_free(vm, btnew2); 1261 return ENOMEM; 1262 1263 gotit: 1264 KASSERT(bt->bt_type == BT_TYPE_FREE); 1265 KASSERT(bt->bt_size >= size); 1266 bt_remfree(vm, bt); 1267 vmem_check(vm); 1268 vm->vm_inuse += size; 1269 if (bt->bt_start != start) { 1270 btnew2->bt_type = BT_TYPE_FREE; 1271 btnew2->bt_start = bt->bt_start; 1272 btnew2->bt_size = start - bt->bt_start; 1273 bt->bt_start = start; 1274 bt->bt_size -= btnew2->bt_size; 1275 bt_insfree(vm, btnew2); 1276 bt_insseg(vm, btnew2, CIRCLEQ_PREV(bt, bt_seglist)); 1277 btnew2 = NULL; 1278 vmem_check(vm); 1279 } 1280 KASSERT(bt->bt_start == start); 1281 if (bt->bt_size != size && bt->bt_size - size > vm->vm_quantum_mask) { 1282 /* split */ 1283 btnew->bt_type = BT_TYPE_BUSY; 1284 btnew->bt_start = bt->bt_start; 1285 btnew->bt_size = size; 1286 bt->bt_start = bt->bt_start + size; 1287 bt->bt_size -= size; 1288 bt_insfree(vm, bt); 1289 bt_insseg(vm, btnew, CIRCLEQ_PREV(bt, bt_seglist)); 1290 bt_insbusy(vm, btnew); 1291 vmem_check(vm); 1292 VMEM_UNLOCK(vm); 1293 } else { 1294 bt->bt_type = BT_TYPE_BUSY; 1295 bt_insbusy(vm, bt); 1296 vmem_check(vm); 1297 VMEM_UNLOCK(vm); 1298 bt_free(vm, btnew); 1299 btnew = bt; 1300 } 1301 if (btnew2 != NULL) { 1302 bt_free(vm, btnew2); 1303 } 1304 KASSERT(btnew->bt_size >= size); 1305 btnew->bt_type = BT_TYPE_BUSY; 1306 1307 if (addrp != NULL) 1308 *addrp = btnew->bt_start; 1309 return 0; 1310 } 1311 1312 /* 1313 * vmem_free: 1314 * 1315 * => caller must ensure appropriate spl, 1316 * if the arena can be accessed from interrupt context. 1317 */ 1318 1319 void 1320 vmem_free(vmem_t *vm, vmem_addr_t addr, vmem_size_t size) 1321 { 1322 1323 KASSERT(size > 0); 1324 1325 #if defined(QCACHE) 1326 if (size <= vm->vm_qcache_max) { 1327 int qidx = (size + vm->vm_quantum_mask) >> vm->vm_quantum_shift; 1328 qcache_t *qc = vm->vm_qcache[qidx - 1]; 1329 1330 pool_cache_put(qc->qc_cache, (void *)addr); 1331 return; 1332 } 1333 #endif /* defined(QCACHE) */ 1334 1335 vmem_xfree(vm, addr, size); 1336 } 1337 1338 void 1339 vmem_xfree(vmem_t *vm, vmem_addr_t addr, vmem_size_t size) 1340 { 1341 bt_t *bt; 1342 bt_t *t; 1343 LIST_HEAD(, vmem_btag) tofree; 1344 1345 LIST_INIT(&tofree); 1346 1347 KASSERT(size > 0); 1348 1349 VMEM_LOCK(vm); 1350 1351 bt = bt_lookupbusy(vm, addr); 1352 KASSERT(bt != NULL); 1353 KASSERT(bt->bt_start == addr); 1354 KASSERT(bt->bt_size == vmem_roundup_size(vm, size) || 1355 bt->bt_size - vmem_roundup_size(vm, size) <= vm->vm_quantum_mask); 1356 KASSERT(bt->bt_type == BT_TYPE_BUSY); 1357 bt_rembusy(vm, bt); 1358 bt->bt_type = BT_TYPE_FREE; 1359 1360 vm->vm_inuse -= bt->bt_size; 1361 1362 /* coalesce */ 1363 t = CIRCLEQ_NEXT(bt, bt_seglist); 1364 if (t != NULL && t->bt_type == BT_TYPE_FREE) { 1365 KASSERT(BT_END(bt) < t->bt_start); /* YYY */ 1366 bt_remfree(vm, t); 1367 bt_remseg(vm, t); 1368 bt->bt_size += t->bt_size; 1369 LIST_INSERT_HEAD(&tofree, t, bt_freelist); 1370 } 1371 t = CIRCLEQ_PREV(bt, bt_seglist); 1372 if (t != NULL && t->bt_type == BT_TYPE_FREE) { 1373 KASSERT(BT_END(t) < bt->bt_start); /* YYY */ 1374 bt_remfree(vm, t); 1375 bt_remseg(vm, t); 1376 bt->bt_size += t->bt_size; 1377 bt->bt_start = t->bt_start; 1378 LIST_INSERT_HEAD(&tofree, t, bt_freelist); 1379 } 1380 1381 t = CIRCLEQ_PREV(bt, bt_seglist); 1382 KASSERT(t != NULL); 1383 KASSERT(BT_ISSPAN_P(t) || t->bt_type == BT_TYPE_BUSY); 1384 if (vm->vm_releasefn != NULL && t->bt_type == BT_TYPE_SPAN && 1385 t->bt_size == bt->bt_size) { 1386 vmem_addr_t spanaddr; 1387 vmem_size_t spansize; 1388 1389 KASSERT(t->bt_start == bt->bt_start); 1390 spanaddr = bt->bt_start; 1391 spansize = bt->bt_size; 1392 bt_remseg(vm, bt); 1393 LIST_INSERT_HEAD(&tofree, bt, bt_freelist); 1394 bt_remseg(vm, t); 1395 LIST_INSERT_HEAD(&tofree, t, bt_freelist); 1396 vm->vm_size -= spansize; 1397 VMEM_CONDVAR_BROADCAST(vm); 1398 VMEM_UNLOCK(vm); 1399 (*vm->vm_releasefn)(vm->vm_arg, spanaddr, spansize); 1400 } else { 1401 bt_insfree(vm, bt); 1402 VMEM_CONDVAR_BROADCAST(vm); 1403 VMEM_UNLOCK(vm); 1404 } 1405 1406 while (!LIST_EMPTY(&tofree)) { 1407 t = LIST_FIRST(&tofree); 1408 LIST_REMOVE(t, bt_freelist); 1409 bt_free(vm, t); 1410 } 1411 } 1412 1413 /* 1414 * vmem_add: 1415 * 1416 * => caller must ensure appropriate spl, 1417 * if the arena can be accessed from interrupt context. 1418 */ 1419 1420 int 1421 vmem_add(vmem_t *vm, vmem_addr_t addr, vmem_size_t size, vm_flag_t flags) 1422 { 1423 1424 return vmem_add1(vm, addr, size, flags, BT_TYPE_SPAN_STATIC); 1425 } 1426 1427 /* 1428 * vmem_size: information about arenas size 1429 * 1430 * => return free/allocated size in arena 1431 */ 1432 vmem_size_t 1433 vmem_size(vmem_t *vm, int typemask) 1434 { 1435 1436 switch (typemask) { 1437 case VMEM_ALLOC: 1438 return vm->vm_inuse; 1439 case VMEM_FREE: 1440 return vm->vm_size - vm->vm_inuse; 1441 case VMEM_FREE|VMEM_ALLOC: 1442 return vm->vm_size; 1443 default: 1444 panic("vmem_size"); 1445 } 1446 } 1447 1448 /* ---- rehash */ 1449 1450 #if defined(_KERNEL) 1451 static struct callout vmem_rehash_ch; 1452 static int vmem_rehash_interval; 1453 static struct workqueue *vmem_rehash_wq; 1454 static struct work vmem_rehash_wk; 1455 1456 static void 1457 vmem_rehash_all(struct work *wk, void *dummy) 1458 { 1459 vmem_t *vm; 1460 1461 KASSERT(wk == &vmem_rehash_wk); 1462 mutex_enter(&vmem_list_lock); 1463 LIST_FOREACH(vm, &vmem_list, vm_alllist) { 1464 size_t desired; 1465 size_t current; 1466 1467 if (!VMEM_TRYLOCK(vm)) { 1468 continue; 1469 } 1470 desired = vm->vm_nbusytag; 1471 current = vm->vm_hashsize; 1472 VMEM_UNLOCK(vm); 1473 1474 if (desired > VMEM_HASHSIZE_MAX) { 1475 desired = VMEM_HASHSIZE_MAX; 1476 } else if (desired < VMEM_HASHSIZE_MIN) { 1477 desired = VMEM_HASHSIZE_MIN; 1478 } 1479 if (desired > current * 2 || desired * 2 < current) { 1480 vmem_rehash(vm, desired, VM_NOSLEEP); 1481 } 1482 } 1483 mutex_exit(&vmem_list_lock); 1484 1485 callout_schedule(&vmem_rehash_ch, vmem_rehash_interval); 1486 } 1487 1488 static void 1489 vmem_rehash_all_kick(void *dummy) 1490 { 1491 1492 workqueue_enqueue(vmem_rehash_wq, &vmem_rehash_wk, NULL); 1493 } 1494 1495 void 1496 vmem_rehash_start(void) 1497 { 1498 int error; 1499 1500 error = workqueue_create(&vmem_rehash_wq, "vmem_rehash", 1501 vmem_rehash_all, NULL, PRI_VM, IPL_SOFTCLOCK, WQ_MPSAFE); 1502 if (error) { 1503 panic("%s: workqueue_create %d\n", __func__, error); 1504 } 1505 callout_init(&vmem_rehash_ch, CALLOUT_MPSAFE); 1506 callout_setfunc(&vmem_rehash_ch, vmem_rehash_all_kick, NULL); 1507 1508 vmem_rehash_interval = hz * 10; 1509 callout_schedule(&vmem_rehash_ch, vmem_rehash_interval); 1510 } 1511 #endif /* defined(_KERNEL) */ 1512 1513 /* ---- debug */ 1514 1515 #if defined(DDB) || defined(UNITTEST) || defined(VMEM_SANITY) 1516 1517 static void bt_dump(const bt_t *, void (*)(const char *, ...)); 1518 1519 static const char * 1520 bt_type_string(int type) 1521 { 1522 static const char * const table[] = { 1523 [BT_TYPE_BUSY] = "busy", 1524 [BT_TYPE_FREE] = "free", 1525 [BT_TYPE_SPAN] = "span", 1526 [BT_TYPE_SPAN_STATIC] = "static span", 1527 }; 1528 1529 if (type >= __arraycount(table)) { 1530 return "BOGUS"; 1531 } 1532 return table[type]; 1533 } 1534 1535 static void 1536 bt_dump(const bt_t *bt, void (*pr)(const char *, ...)) 1537 { 1538 1539 (*pr)("\t%p: %" PRIu64 ", %" PRIu64 ", %d(%s)\n", 1540 bt, (uint64_t)bt->bt_start, (uint64_t)bt->bt_size, 1541 bt->bt_type, bt_type_string(bt->bt_type)); 1542 } 1543 1544 static void 1545 vmem_dump(const vmem_t *vm , void (*pr)(const char *, ...)) 1546 { 1547 const bt_t *bt; 1548 int i; 1549 1550 (*pr)("vmem %p '%s'\n", vm, vm->vm_name); 1551 CIRCLEQ_FOREACH(bt, &vm->vm_seglist, bt_seglist) { 1552 bt_dump(bt, pr); 1553 } 1554 1555 for (i = 0; i < VMEM_MAXORDER; i++) { 1556 const struct vmem_freelist *fl = &vm->vm_freelist[i]; 1557 1558 if (LIST_EMPTY(fl)) { 1559 continue; 1560 } 1561 1562 (*pr)("freelist[%d]\n", i); 1563 LIST_FOREACH(bt, fl, bt_freelist) { 1564 bt_dump(bt, pr); 1565 } 1566 } 1567 } 1568 1569 #endif /* defined(DDB) || defined(UNITTEST) || defined(VMEM_SANITY) */ 1570 1571 #if defined(DDB) 1572 static bt_t * 1573 vmem_whatis_lookup(vmem_t *vm, uintptr_t addr) 1574 { 1575 bt_t *bt; 1576 1577 CIRCLEQ_FOREACH(bt, &vm->vm_seglist, bt_seglist) { 1578 if (BT_ISSPAN_P(bt)) { 1579 continue; 1580 } 1581 if (bt->bt_start <= addr && addr <= BT_END(bt)) { 1582 return bt; 1583 } 1584 } 1585 1586 return NULL; 1587 } 1588 1589 void 1590 vmem_whatis(uintptr_t addr, void (*pr)(const char *, ...)) 1591 { 1592 vmem_t *vm; 1593 1594 LIST_FOREACH(vm, &vmem_list, vm_alllist) { 1595 bt_t *bt; 1596 1597 bt = vmem_whatis_lookup(vm, addr); 1598 if (bt == NULL) { 1599 continue; 1600 } 1601 (*pr)("%p is %p+%zu in VMEM '%s' (%s)\n", 1602 (void *)addr, (void *)bt->bt_start, 1603 (size_t)(addr - bt->bt_start), vm->vm_name, 1604 (bt->bt_type == BT_TYPE_BUSY) ? "allocated" : "free"); 1605 } 1606 } 1607 1608 void 1609 vmem_printall(const char *modif, void (*pr)(const char *, ...)) 1610 { 1611 const vmem_t *vm; 1612 1613 LIST_FOREACH(vm, &vmem_list, vm_alllist) { 1614 vmem_dump(vm, pr); 1615 } 1616 } 1617 1618 void 1619 vmem_print(uintptr_t addr, const char *modif, void (*pr)(const char *, ...)) 1620 { 1621 const vmem_t *vm = (const void *)addr; 1622 1623 vmem_dump(vm, pr); 1624 } 1625 #endif /* defined(DDB) */ 1626 1627 #if defined(_KERNEL) 1628 #define vmem_printf printf 1629 #else 1630 #include <stdio.h> 1631 #include <stdarg.h> 1632 1633 static void 1634 vmem_printf(const char *fmt, ...) 1635 { 1636 va_list ap; 1637 va_start(ap, fmt); 1638 vprintf(fmt, ap); 1639 va_end(ap); 1640 } 1641 #endif 1642 1643 #if defined(VMEM_SANITY) 1644 1645 static bool 1646 vmem_check_sanity(vmem_t *vm) 1647 { 1648 const bt_t *bt, *bt2; 1649 1650 KASSERT(vm != NULL); 1651 1652 CIRCLEQ_FOREACH(bt, &vm->vm_seglist, bt_seglist) { 1653 if (bt->bt_start > BT_END(bt)) { 1654 printf("corrupted tag\n"); 1655 bt_dump(bt, vmem_printf); 1656 return false; 1657 } 1658 } 1659 CIRCLEQ_FOREACH(bt, &vm->vm_seglist, bt_seglist) { 1660 CIRCLEQ_FOREACH(bt2, &vm->vm_seglist, bt_seglist) { 1661 if (bt == bt2) { 1662 continue; 1663 } 1664 if (BT_ISSPAN_P(bt) != BT_ISSPAN_P(bt2)) { 1665 continue; 1666 } 1667 if (bt->bt_start <= BT_END(bt2) && 1668 bt2->bt_start <= BT_END(bt)) { 1669 printf("overwrapped tags\n"); 1670 bt_dump(bt, vmem_printf); 1671 bt_dump(bt2, vmem_printf); 1672 return false; 1673 } 1674 } 1675 } 1676 1677 return true; 1678 } 1679 1680 static void 1681 vmem_check(vmem_t *vm) 1682 { 1683 1684 if (!vmem_check_sanity(vm)) { 1685 panic("insanity vmem %p", vm); 1686 } 1687 } 1688 1689 #endif /* defined(VMEM_SANITY) */ 1690 1691 #if defined(UNITTEST) 1692 int 1693 main(void) 1694 { 1695 int rc; 1696 vmem_t *vm; 1697 vmem_addr_t p; 1698 struct reg { 1699 vmem_addr_t p; 1700 vmem_size_t sz; 1701 bool x; 1702 } *reg = NULL; 1703 int nreg = 0; 1704 int nalloc = 0; 1705 int nfree = 0; 1706 vmem_size_t total = 0; 1707 #if 1 1708 vm_flag_t strat = VM_INSTANTFIT; 1709 #else 1710 vm_flag_t strat = VM_BESTFIT; 1711 #endif 1712 1713 vm = vmem_create("test", 0, 0, 1, NULL, NULL, NULL, 0, VM_SLEEP, 1714 #ifdef _KERNEL 1715 IPL_NONE 1716 #else 1717 0 1718 #endif 1719 ); 1720 if (vm == NULL) { 1721 printf("vmem_create\n"); 1722 exit(EXIT_FAILURE); 1723 } 1724 vmem_dump(vm, vmem_printf); 1725 1726 rc = vmem_add(vm, 0, 50, VM_SLEEP); 1727 assert(rc == 0); 1728 rc = vmem_add(vm, 100, 200, VM_SLEEP); 1729 assert(rc == 0); 1730 rc = vmem_add(vm, 2000, 1, VM_SLEEP); 1731 assert(rc == 0); 1732 rc = vmem_add(vm, 40000, 65536, VM_SLEEP); 1733 assert(rc == 0); 1734 rc = vmem_add(vm, 10000, 10000, VM_SLEEP); 1735 assert(rc == 0); 1736 rc = vmem_add(vm, 500, 1000, VM_SLEEP); 1737 assert(rc == 0); 1738 rc = vmem_add(vm, 0xffffff00, 0x100, VM_SLEEP); 1739 assert(rc == 0); 1740 rc = vmem_xalloc(vm, 0x101, 0, 0, 0, 1741 0xffffff00, 0xffffffff, strat|VM_SLEEP, &p); 1742 assert(rc != 0); 1743 rc = vmem_xalloc(vm, 50, 0, 0, 0, 0, 49, strat|VM_SLEEP, &p); 1744 assert(rc == 0 && p == 0); 1745 vmem_xfree(vm, p, 50); 1746 rc = vmem_xalloc(vm, 25, 0, 0, 0, 0, 24, strat|VM_SLEEP, &p); 1747 assert(rc == 0 && p == 0); 1748 rc = vmem_xalloc(vm, 0x100, 0, 0, 0, 1749 0xffffff01, 0xffffffff, strat|VM_SLEEP, &p); 1750 assert(rc != 0); 1751 rc = vmem_xalloc(vm, 0x100, 0, 0, 0, 1752 0xffffff00, 0xfffffffe, strat|VM_SLEEP, &p); 1753 assert(rc != 0); 1754 rc = vmem_xalloc(vm, 0x100, 0, 0, 0, 1755 0xffffff00, 0xffffffff, strat|VM_SLEEP, &p); 1756 assert(rc == 0); 1757 vmem_dump(vm, vmem_printf); 1758 for (;;) { 1759 struct reg *r; 1760 int t = rand() % 100; 1761 1762 if (t > 45) { 1763 /* alloc */ 1764 vmem_size_t sz = rand() % 500 + 1; 1765 bool x; 1766 vmem_size_t align, phase, nocross; 1767 vmem_addr_t minaddr, maxaddr; 1768 1769 if (t > 70) { 1770 x = true; 1771 /* XXX */ 1772 align = 1 << (rand() % 15); 1773 phase = rand() % 65536; 1774 nocross = 1 << (rand() % 15); 1775 if (align <= phase) { 1776 phase = 0; 1777 } 1778 if (VMEM_CROSS_P(phase, phase + sz - 1, 1779 nocross)) { 1780 nocross = 0; 1781 } 1782 do { 1783 minaddr = rand() % 50000; 1784 maxaddr = rand() % 70000; 1785 } while (minaddr > maxaddr); 1786 printf("=== xalloc %" PRIu64 1787 " align=%" PRIu64 ", phase=%" PRIu64 1788 ", nocross=%" PRIu64 ", min=%" PRIu64 1789 ", max=%" PRIu64 "\n", 1790 (uint64_t)sz, 1791 (uint64_t)align, 1792 (uint64_t)phase, 1793 (uint64_t)nocross, 1794 (uint64_t)minaddr, 1795 (uint64_t)maxaddr); 1796 rc = vmem_xalloc(vm, sz, align, phase, nocross, 1797 minaddr, maxaddr, strat|VM_SLEEP, &p); 1798 } else { 1799 x = false; 1800 printf("=== alloc %" PRIu64 "\n", (uint64_t)sz); 1801 rc = vmem_alloc(vm, sz, strat|VM_SLEEP, &p); 1802 } 1803 printf("-> %" PRIu64 "\n", (uint64_t)p); 1804 vmem_dump(vm, vmem_printf); 1805 if (rc != 0) { 1806 if (x) { 1807 continue; 1808 } 1809 break; 1810 } 1811 nreg++; 1812 reg = realloc(reg, sizeof(*reg) * nreg); 1813 r = ®[nreg - 1]; 1814 r->p = p; 1815 r->sz = sz; 1816 r->x = x; 1817 total += sz; 1818 nalloc++; 1819 } else if (nreg != 0) { 1820 /* free */ 1821 r = ®[rand() % nreg]; 1822 printf("=== free %" PRIu64 ", %" PRIu64 "\n", 1823 (uint64_t)r->p, (uint64_t)r->sz); 1824 if (r->x) { 1825 vmem_xfree(vm, r->p, r->sz); 1826 } else { 1827 vmem_free(vm, r->p, r->sz); 1828 } 1829 total -= r->sz; 1830 vmem_dump(vm, vmem_printf); 1831 *r = reg[nreg - 1]; 1832 nreg--; 1833 nfree++; 1834 } 1835 printf("total=%" PRIu64 "\n", (uint64_t)total); 1836 } 1837 fprintf(stderr, "total=%" PRIu64 ", nalloc=%d, nfree=%d\n", 1838 (uint64_t)total, nalloc, nfree); 1839 exit(EXIT_SUCCESS); 1840 } 1841 #endif /* defined(UNITTEST) */ 1842