1 /* $NetBSD: subr_vmem.c,v 1.76 2012/09/13 21:44:50 joerg Exp $ */ 2 3 /*- 4 * Copyright (c)2006,2007,2008,2009 YAMAMOTO Takashi, 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 */ 28 29 /* 30 * reference: 31 * - Magazines and Vmem: Extending the Slab Allocator 32 * to Many CPUs and Arbitrary Resources 33 * http://www.usenix.org/event/usenix01/bonwick.html 34 */ 35 36 #include <sys/cdefs.h> 37 __KERNEL_RCSID(0, "$NetBSD: subr_vmem.c,v 1.76 2012/09/13 21:44:50 joerg Exp $"); 38 39 #if defined(_KERNEL) 40 #include "opt_ddb.h" 41 #define QCACHE 42 #endif /* defined(_KERNEL) */ 43 44 #include <sys/param.h> 45 #include <sys/hash.h> 46 #include <sys/queue.h> 47 #include <sys/bitops.h> 48 49 #if defined(_KERNEL) 50 #include <sys/systm.h> 51 #include <sys/kernel.h> /* hz */ 52 #include <sys/callout.h> 53 #include <sys/kmem.h> 54 #include <sys/pool.h> 55 #include <sys/vmem.h> 56 #include <sys/workqueue.h> 57 #include <sys/atomic.h> 58 #include <uvm/uvm.h> 59 #include <uvm/uvm_extern.h> 60 #include <uvm/uvm_km.h> 61 #include <uvm/uvm_page.h> 62 #include <uvm/uvm_pdaemon.h> 63 #else /* defined(_KERNEL) */ 64 #include "../sys/vmem.h" 65 #endif /* defined(_KERNEL) */ 66 67 68 #if defined(_KERNEL) 69 #include <sys/evcnt.h> 70 #define VMEM_EVCNT_DEFINE(name) \ 71 struct evcnt vmem_evcnt_##name = EVCNT_INITIALIZER(EVCNT_TYPE_MISC, NULL, \ 72 "vmemev", #name); \ 73 EVCNT_ATTACH_STATIC(vmem_evcnt_##name); 74 #define VMEM_EVCNT_INCR(ev) vmem_evcnt_##ev.ev_count++ 75 #define VMEM_EVCNT_DECR(ev) vmem_evcnt_##ev.ev_count-- 76 77 VMEM_EVCNT_DEFINE(bt_pages) 78 VMEM_EVCNT_DEFINE(bt_count) 79 VMEM_EVCNT_DEFINE(bt_inuse) 80 81 #define LOCK_DECL(name) \ 82 kmutex_t name; char lockpad[COHERENCY_UNIT - sizeof(kmutex_t)] 83 84 #define CONDVAR_DECL(name) \ 85 kcondvar_t name 86 87 #else /* defined(_KERNEL) */ 88 #include <stdio.h> 89 #include <errno.h> 90 #include <assert.h> 91 #include <stdlib.h> 92 #include <string.h> 93 94 #define VMEM_EVCNT_INCR(ev) /* nothing */ 95 #define VMEM_EVCNT_DECR(ev) /* nothing */ 96 97 #define UNITTEST 98 #define KASSERT(a) assert(a) 99 #define LOCK_DECL(name) /* nothing */ 100 #define CONDVAR_DECL(name) /* nothing */ 101 #define VMEM_CONDVAR_INIT(vm, wchan) /* nothing */ 102 #define VMEM_CONDVAR_BROADCAST(vm) /* nothing */ 103 #define mutex_init(a, b, c) /* nothing */ 104 #define mutex_destroy(a) /* nothing */ 105 #define mutex_enter(a) /* nothing */ 106 #define mutex_tryenter(a) true 107 #define mutex_exit(a) /* nothing */ 108 #define mutex_owned(a) /* nothing */ 109 #define ASSERT_SLEEPABLE() /* nothing */ 110 #define panic(...) printf(__VA_ARGS__); abort() 111 #endif /* defined(_KERNEL) */ 112 113 struct vmem; 114 struct vmem_btag; 115 116 #if defined(VMEM_SANITY) 117 static void vmem_check(vmem_t *); 118 #else /* defined(VMEM_SANITY) */ 119 #define vmem_check(vm) /* nothing */ 120 #endif /* defined(VMEM_SANITY) */ 121 122 #define VMEM_MAXORDER (sizeof(vmem_size_t) * CHAR_BIT) 123 124 #define VMEM_HASHSIZE_MIN 1 /* XXX */ 125 #define VMEM_HASHSIZE_MAX 65536 /* XXX */ 126 #define VMEM_HASHSIZE_INIT 1 127 128 #define VM_FITMASK (VM_BESTFIT | VM_INSTANTFIT) 129 130 CIRCLEQ_HEAD(vmem_seglist, vmem_btag); 131 LIST_HEAD(vmem_freelist, vmem_btag); 132 LIST_HEAD(vmem_hashlist, vmem_btag); 133 134 #if defined(QCACHE) 135 #define VMEM_QCACHE_IDX_MAX 32 136 137 #define QC_NAME_MAX 16 138 139 struct qcache { 140 pool_cache_t qc_cache; 141 vmem_t *qc_vmem; 142 char qc_name[QC_NAME_MAX]; 143 }; 144 typedef struct qcache qcache_t; 145 #define QC_POOL_TO_QCACHE(pool) ((qcache_t *)(pool->pr_qcache)) 146 #endif /* defined(QCACHE) */ 147 148 #define VMEM_NAME_MAX 16 149 150 /* vmem arena */ 151 struct vmem { 152 CONDVAR_DECL(vm_cv); 153 LOCK_DECL(vm_lock); 154 vm_flag_t vm_flags; 155 vmem_import_t *vm_importfn; 156 vmem_release_t *vm_releasefn; 157 size_t vm_nfreetags; 158 LIST_HEAD(, vmem_btag) vm_freetags; 159 void *vm_arg; 160 struct vmem_seglist vm_seglist; 161 struct vmem_freelist vm_freelist[VMEM_MAXORDER]; 162 size_t vm_hashsize; 163 size_t vm_nbusytag; 164 struct vmem_hashlist *vm_hashlist; 165 struct vmem_hashlist vm_hash0; 166 size_t vm_quantum_mask; 167 int vm_quantum_shift; 168 size_t vm_size; 169 size_t vm_inuse; 170 char vm_name[VMEM_NAME_MAX+1]; 171 LIST_ENTRY(vmem) vm_alllist; 172 173 #if defined(QCACHE) 174 /* quantum cache */ 175 size_t vm_qcache_max; 176 struct pool_allocator vm_qcache_allocator; 177 qcache_t vm_qcache_store[VMEM_QCACHE_IDX_MAX]; 178 qcache_t *vm_qcache[VMEM_QCACHE_IDX_MAX]; 179 #endif /* defined(QCACHE) */ 180 }; 181 182 #define VMEM_LOCK(vm) mutex_enter(&vm->vm_lock) 183 #define VMEM_TRYLOCK(vm) mutex_tryenter(&vm->vm_lock) 184 #define VMEM_UNLOCK(vm) mutex_exit(&vm->vm_lock) 185 #define VMEM_LOCK_INIT(vm, ipl) mutex_init(&vm->vm_lock, MUTEX_DEFAULT, ipl) 186 #define VMEM_LOCK_DESTROY(vm) mutex_destroy(&vm->vm_lock) 187 #define VMEM_ASSERT_LOCKED(vm) KASSERT(mutex_owned(&vm->vm_lock)) 188 189 #if defined(_KERNEL) 190 #define VMEM_CONDVAR_INIT(vm, wchan) cv_init(&vm->vm_cv, wchan) 191 #define VMEM_CONDVAR_DESTROY(vm) cv_destroy(&vm->vm_cv) 192 #define VMEM_CONDVAR_WAIT(vm) cv_wait(&vm->vm_cv, &vm->vm_lock) 193 #define VMEM_CONDVAR_BROADCAST(vm) cv_broadcast(&vm->vm_cv) 194 #endif /* defined(_KERNEL) */ 195 196 /* boundary tag */ 197 struct vmem_btag { 198 CIRCLEQ_ENTRY(vmem_btag) bt_seglist; 199 union { 200 LIST_ENTRY(vmem_btag) u_freelist; /* BT_TYPE_FREE */ 201 LIST_ENTRY(vmem_btag) u_hashlist; /* BT_TYPE_BUSY */ 202 } bt_u; 203 #define bt_hashlist bt_u.u_hashlist 204 #define bt_freelist bt_u.u_freelist 205 vmem_addr_t bt_start; 206 vmem_size_t bt_size; 207 int bt_type; 208 }; 209 210 #define BT_TYPE_SPAN 1 211 #define BT_TYPE_SPAN_STATIC 2 212 #define BT_TYPE_FREE 3 213 #define BT_TYPE_BUSY 4 214 #define BT_ISSPAN_P(bt) ((bt)->bt_type <= BT_TYPE_SPAN_STATIC) 215 216 #define BT_END(bt) ((bt)->bt_start + (bt)->bt_size - 1) 217 218 typedef struct vmem_btag bt_t; 219 220 #if defined(_KERNEL) 221 static kmutex_t vmem_list_lock; 222 static LIST_HEAD(, vmem) vmem_list = LIST_HEAD_INITIALIZER(vmem_list); 223 #endif /* defined(_KERNEL) */ 224 225 /* ---- misc */ 226 227 #define VMEM_ALIGNUP(addr, align) \ 228 (-(-(addr) & -(align))) 229 230 #define VMEM_CROSS_P(addr1, addr2, boundary) \ 231 ((((addr1) ^ (addr2)) & -(boundary)) != 0) 232 233 #define ORDER2SIZE(order) ((vmem_size_t)1 << (order)) 234 #define SIZE2ORDER(size) ((int)ilog2(size)) 235 236 #if !defined(_KERNEL) 237 #define xmalloc(sz, flags) malloc(sz) 238 #define xfree(p, sz) free(p) 239 #define bt_alloc(vm, flags) malloc(sizeof(bt_t)) 240 #define bt_free(vm, bt) free(bt) 241 #else /* defined(_KERNEL) */ 242 243 #define xmalloc(sz, flags) \ 244 kmem_intr_alloc(sz, ((flags) & VM_SLEEP) ? KM_SLEEP : KM_NOSLEEP); 245 #define xfree(p, sz) kmem_intr_free(p, sz); 246 247 /* 248 * Memory for arenas initialized during bootstrap. 249 * There is memory for STATIC_VMEM_COUNT bootstrap arenas. 250 * 251 * BT_RESERVE calculation: 252 * we allocate memory for boundry tags with vmem, therefor we have 253 * to keep a reserve of bts used to allocated memory for bts. 254 * This reserve is 4 for each arena involved in allocating vmems memory. 255 * BT_MAXFREE: don't cache excessive counts of bts in arenas 256 */ 257 #define STATIC_VMEM_COUNT 4 258 #define STATIC_BT_COUNT 200 259 #define BT_MINRESERVE 4 260 #define BT_MAXFREE 64 261 /* must be equal or greater then qcache multiplier for kmem_va_arena */ 262 #define STATIC_QC_POOL_COUNT 8 263 264 static struct vmem static_vmems[STATIC_VMEM_COUNT]; 265 static int static_vmem_count = STATIC_VMEM_COUNT; 266 267 static struct vmem_btag static_bts[STATIC_BT_COUNT]; 268 static int static_bt_count = STATIC_BT_COUNT; 269 270 static struct pool_cache static_qc_pools[STATIC_QC_POOL_COUNT]; 271 static int static_qc_pool_count = STATIC_QC_POOL_COUNT; 272 273 vmem_t *kmem_va_meta_arena; 274 vmem_t *kmem_meta_arena; 275 276 static kmutex_t vmem_btag_lock; 277 static LIST_HEAD(, vmem_btag) vmem_btag_freelist; 278 static size_t vmem_btag_freelist_count = 0; 279 static size_t vmem_btag_count = STATIC_BT_COUNT; 280 281 /* ---- boundary tag */ 282 283 #define BT_PER_PAGE (PAGE_SIZE / sizeof(bt_t)) 284 285 static int bt_refill(vmem_t *vm, vm_flag_t flags); 286 287 static int 288 bt_refillglobal(vm_flag_t flags) 289 { 290 vmem_addr_t va; 291 bt_t *btp; 292 bt_t *bt; 293 int i; 294 295 mutex_enter(&vmem_btag_lock); 296 if (vmem_btag_freelist_count > 0) { 297 mutex_exit(&vmem_btag_lock); 298 return 0; 299 } 300 301 if (vmem_alloc(kmem_meta_arena, PAGE_SIZE, 302 (flags & ~VM_FITMASK) | VM_INSTANTFIT | VM_POPULATING, &va) != 0) { 303 mutex_exit(&vmem_btag_lock); 304 return ENOMEM; 305 } 306 VMEM_EVCNT_INCR(bt_pages); 307 308 btp = (void *) va; 309 for (i = 0; i < (BT_PER_PAGE); i++) { 310 bt = btp; 311 memset(bt, 0, sizeof(*bt)); 312 LIST_INSERT_HEAD(&vmem_btag_freelist, bt, 313 bt_freelist); 314 vmem_btag_freelist_count++; 315 vmem_btag_count++; 316 VMEM_EVCNT_INCR(bt_count); 317 btp++; 318 } 319 mutex_exit(&vmem_btag_lock); 320 321 bt_refill(kmem_arena, (flags & ~VM_FITMASK) | VM_INSTANTFIT); 322 bt_refill(kmem_va_meta_arena, (flags & ~VM_FITMASK) | VM_INSTANTFIT); 323 bt_refill(kmem_meta_arena, (flags & ~VM_FITMASK) | VM_INSTANTFIT); 324 325 return 0; 326 } 327 328 static int 329 bt_refill(vmem_t *vm, vm_flag_t flags) 330 { 331 bt_t *bt; 332 333 bt_refillglobal(flags); 334 335 VMEM_LOCK(vm); 336 mutex_enter(&vmem_btag_lock); 337 while (!LIST_EMPTY(&vmem_btag_freelist) && 338 vm->vm_nfreetags <= BT_MINRESERVE) { 339 bt = LIST_FIRST(&vmem_btag_freelist); 340 LIST_REMOVE(bt, bt_freelist); 341 LIST_INSERT_HEAD(&vm->vm_freetags, bt, bt_freelist); 342 vm->vm_nfreetags++; 343 vmem_btag_freelist_count--; 344 } 345 mutex_exit(&vmem_btag_lock); 346 347 if (vm->vm_nfreetags == 0) { 348 VMEM_UNLOCK(vm); 349 return ENOMEM; 350 } 351 VMEM_UNLOCK(vm); 352 353 return 0; 354 } 355 356 static inline bt_t * 357 bt_alloc(vmem_t *vm, vm_flag_t flags) 358 { 359 bt_t *bt; 360 again: 361 VMEM_LOCK(vm); 362 if (vm->vm_nfreetags <= BT_MINRESERVE && 363 (flags & VM_POPULATING) == 0) { 364 VMEM_UNLOCK(vm); 365 if (bt_refill(vm, VM_NOSLEEP | VM_INSTANTFIT)) { 366 return NULL; 367 } 368 goto again; 369 } 370 bt = LIST_FIRST(&vm->vm_freetags); 371 LIST_REMOVE(bt, bt_freelist); 372 vm->vm_nfreetags--; 373 VMEM_UNLOCK(vm); 374 VMEM_EVCNT_INCR(bt_inuse); 375 376 return bt; 377 } 378 379 static inline void 380 bt_free(vmem_t *vm, bt_t *bt) 381 { 382 383 VMEM_LOCK(vm); 384 LIST_INSERT_HEAD(&vm->vm_freetags, bt, bt_freelist); 385 vm->vm_nfreetags++; 386 while (vm->vm_nfreetags > BT_MAXFREE) { 387 bt = LIST_FIRST(&vm->vm_freetags); 388 LIST_REMOVE(bt, bt_freelist); 389 vm->vm_nfreetags--; 390 mutex_enter(&vmem_btag_lock); 391 LIST_INSERT_HEAD(&vmem_btag_freelist, bt, bt_freelist); 392 vmem_btag_freelist_count++; 393 mutex_exit(&vmem_btag_lock); 394 } 395 VMEM_UNLOCK(vm); 396 VMEM_EVCNT_DECR(bt_inuse); 397 } 398 399 #endif /* defined(_KERNEL) */ 400 401 /* 402 * freelist[0] ... [1, 1] 403 * freelist[1] ... [2, 3] 404 * freelist[2] ... [4, 7] 405 * freelist[3] ... [8, 15] 406 * : 407 * freelist[n] ... [(1 << n), (1 << (n + 1)) - 1] 408 * : 409 */ 410 411 static struct vmem_freelist * 412 bt_freehead_tofree(vmem_t *vm, vmem_size_t size) 413 { 414 const vmem_size_t qsize = size >> vm->vm_quantum_shift; 415 const int idx = SIZE2ORDER(qsize); 416 417 KASSERT(size != 0 && qsize != 0); 418 KASSERT((size & vm->vm_quantum_mask) == 0); 419 KASSERT(idx >= 0); 420 KASSERT(idx < VMEM_MAXORDER); 421 422 return &vm->vm_freelist[idx]; 423 } 424 425 /* 426 * bt_freehead_toalloc: return the freelist for the given size and allocation 427 * strategy. 428 * 429 * for VM_INSTANTFIT, return the list in which any blocks are large enough 430 * for the requested size. otherwise, return the list which can have blocks 431 * large enough for the requested size. 432 */ 433 434 static struct vmem_freelist * 435 bt_freehead_toalloc(vmem_t *vm, vmem_size_t size, vm_flag_t strat) 436 { 437 const vmem_size_t qsize = size >> vm->vm_quantum_shift; 438 int idx = SIZE2ORDER(qsize); 439 440 KASSERT(size != 0 && qsize != 0); 441 KASSERT((size & vm->vm_quantum_mask) == 0); 442 443 if (strat == VM_INSTANTFIT && ORDER2SIZE(idx) != qsize) { 444 idx++; 445 /* check too large request? */ 446 } 447 KASSERT(idx >= 0); 448 KASSERT(idx < VMEM_MAXORDER); 449 450 return &vm->vm_freelist[idx]; 451 } 452 453 /* ---- boundary tag hash */ 454 455 static struct vmem_hashlist * 456 bt_hashhead(vmem_t *vm, vmem_addr_t addr) 457 { 458 struct vmem_hashlist *list; 459 unsigned int hash; 460 461 hash = hash32_buf(&addr, sizeof(addr), HASH32_BUF_INIT); 462 list = &vm->vm_hashlist[hash % vm->vm_hashsize]; 463 464 return list; 465 } 466 467 static bt_t * 468 bt_lookupbusy(vmem_t *vm, vmem_addr_t addr) 469 { 470 struct vmem_hashlist *list; 471 bt_t *bt; 472 473 list = bt_hashhead(vm, addr); 474 LIST_FOREACH(bt, list, bt_hashlist) { 475 if (bt->bt_start == addr) { 476 break; 477 } 478 } 479 480 return bt; 481 } 482 483 static void 484 bt_rembusy(vmem_t *vm, bt_t *bt) 485 { 486 487 KASSERT(vm->vm_nbusytag > 0); 488 vm->vm_inuse -= bt->bt_size; 489 vm->vm_nbusytag--; 490 LIST_REMOVE(bt, bt_hashlist); 491 } 492 493 static void 494 bt_insbusy(vmem_t *vm, bt_t *bt) 495 { 496 struct vmem_hashlist *list; 497 498 KASSERT(bt->bt_type == BT_TYPE_BUSY); 499 500 list = bt_hashhead(vm, bt->bt_start); 501 LIST_INSERT_HEAD(list, bt, bt_hashlist); 502 vm->vm_nbusytag++; 503 vm->vm_inuse += bt->bt_size; 504 } 505 506 /* ---- boundary tag list */ 507 508 static void 509 bt_remseg(vmem_t *vm, bt_t *bt) 510 { 511 512 CIRCLEQ_REMOVE(&vm->vm_seglist, bt, bt_seglist); 513 } 514 515 static void 516 bt_insseg(vmem_t *vm, bt_t *bt, bt_t *prev) 517 { 518 519 CIRCLEQ_INSERT_AFTER(&vm->vm_seglist, prev, bt, bt_seglist); 520 } 521 522 static void 523 bt_insseg_tail(vmem_t *vm, bt_t *bt) 524 { 525 526 CIRCLEQ_INSERT_TAIL(&vm->vm_seglist, bt, bt_seglist); 527 } 528 529 static void 530 bt_remfree(vmem_t *vm, bt_t *bt) 531 { 532 533 KASSERT(bt->bt_type == BT_TYPE_FREE); 534 535 LIST_REMOVE(bt, bt_freelist); 536 } 537 538 static void 539 bt_insfree(vmem_t *vm, bt_t *bt) 540 { 541 struct vmem_freelist *list; 542 543 list = bt_freehead_tofree(vm, bt->bt_size); 544 LIST_INSERT_HEAD(list, bt, bt_freelist); 545 } 546 547 /* ---- vmem internal functions */ 548 549 #if defined(QCACHE) 550 static inline vm_flag_t 551 prf_to_vmf(int prflags) 552 { 553 vm_flag_t vmflags; 554 555 KASSERT((prflags & ~(PR_LIMITFAIL | PR_WAITOK | PR_NOWAIT)) == 0); 556 if ((prflags & PR_WAITOK) != 0) { 557 vmflags = VM_SLEEP; 558 } else { 559 vmflags = VM_NOSLEEP; 560 } 561 return vmflags; 562 } 563 564 static inline int 565 vmf_to_prf(vm_flag_t vmflags) 566 { 567 int prflags; 568 569 if ((vmflags & VM_SLEEP) != 0) { 570 prflags = PR_WAITOK; 571 } else { 572 prflags = PR_NOWAIT; 573 } 574 return prflags; 575 } 576 577 static size_t 578 qc_poolpage_size(size_t qcache_max) 579 { 580 int i; 581 582 for (i = 0; ORDER2SIZE(i) <= qcache_max * 3; i++) { 583 /* nothing */ 584 } 585 return ORDER2SIZE(i); 586 } 587 588 static void * 589 qc_poolpage_alloc(struct pool *pool, int prflags) 590 { 591 qcache_t *qc = QC_POOL_TO_QCACHE(pool); 592 vmem_t *vm = qc->qc_vmem; 593 vmem_addr_t addr; 594 595 if (vmem_alloc(vm, pool->pr_alloc->pa_pagesz, 596 prf_to_vmf(prflags) | VM_INSTANTFIT, &addr) != 0) 597 return NULL; 598 return (void *)addr; 599 } 600 601 static void 602 qc_poolpage_free(struct pool *pool, void *addr) 603 { 604 qcache_t *qc = QC_POOL_TO_QCACHE(pool); 605 vmem_t *vm = qc->qc_vmem; 606 607 vmem_free(vm, (vmem_addr_t)addr, pool->pr_alloc->pa_pagesz); 608 } 609 610 static void 611 qc_init(vmem_t *vm, size_t qcache_max, int ipl) 612 { 613 qcache_t *prevqc; 614 struct pool_allocator *pa; 615 int qcache_idx_max; 616 int i; 617 618 KASSERT((qcache_max & vm->vm_quantum_mask) == 0); 619 if (qcache_max > (VMEM_QCACHE_IDX_MAX << vm->vm_quantum_shift)) { 620 qcache_max = VMEM_QCACHE_IDX_MAX << vm->vm_quantum_shift; 621 } 622 vm->vm_qcache_max = qcache_max; 623 pa = &vm->vm_qcache_allocator; 624 memset(pa, 0, sizeof(*pa)); 625 pa->pa_alloc = qc_poolpage_alloc; 626 pa->pa_free = qc_poolpage_free; 627 pa->pa_pagesz = qc_poolpage_size(qcache_max); 628 629 qcache_idx_max = qcache_max >> vm->vm_quantum_shift; 630 prevqc = NULL; 631 for (i = qcache_idx_max; i > 0; i--) { 632 qcache_t *qc = &vm->vm_qcache_store[i - 1]; 633 size_t size = i << vm->vm_quantum_shift; 634 pool_cache_t pc; 635 636 qc->qc_vmem = vm; 637 snprintf(qc->qc_name, sizeof(qc->qc_name), "%s-%zu", 638 vm->vm_name, size); 639 640 if (vm->vm_flags & VM_BOOTSTRAP) { 641 KASSERT(static_qc_pool_count > 0); 642 pc = &static_qc_pools[--static_qc_pool_count]; 643 pool_cache_bootstrap(pc, size, 644 ORDER2SIZE(vm->vm_quantum_shift), 0, 645 PR_NOALIGN | PR_NOTOUCH | PR_RECURSIVE /* XXX */, 646 qc->qc_name, pa, ipl, NULL, NULL, NULL); 647 } else { 648 pc = pool_cache_init(size, 649 ORDER2SIZE(vm->vm_quantum_shift), 0, 650 PR_NOALIGN | PR_NOTOUCH /* XXX */, 651 qc->qc_name, pa, ipl, NULL, NULL, NULL); 652 } 653 qc->qc_cache = pc; 654 KASSERT(qc->qc_cache != NULL); /* XXX */ 655 if (prevqc != NULL && 656 qc->qc_cache->pc_pool.pr_itemsperpage == 657 prevqc->qc_cache->pc_pool.pr_itemsperpage) { 658 if (vm->vm_flags & VM_BOOTSTRAP) { 659 pool_cache_bootstrap_destroy(pc); 660 //static_qc_pool_count++; 661 } else { 662 pool_cache_destroy(qc->qc_cache); 663 } 664 vm->vm_qcache[i - 1] = prevqc; 665 continue; 666 } 667 qc->qc_cache->pc_pool.pr_qcache = qc; 668 vm->vm_qcache[i - 1] = qc; 669 prevqc = qc; 670 } 671 } 672 673 static void 674 qc_destroy(vmem_t *vm) 675 { 676 const qcache_t *prevqc; 677 int i; 678 int qcache_idx_max; 679 680 qcache_idx_max = vm->vm_qcache_max >> vm->vm_quantum_shift; 681 prevqc = NULL; 682 for (i = 0; i < qcache_idx_max; i++) { 683 qcache_t *qc = vm->vm_qcache[i]; 684 685 if (prevqc == qc) { 686 continue; 687 } 688 if (vm->vm_flags & VM_BOOTSTRAP) { 689 pool_cache_bootstrap_destroy(qc->qc_cache); 690 } else { 691 pool_cache_destroy(qc->qc_cache); 692 } 693 prevqc = qc; 694 } 695 } 696 #endif 697 698 #if defined(_KERNEL) 699 void 700 vmem_bootstrap(void) 701 { 702 703 mutex_init(&vmem_list_lock, MUTEX_DEFAULT, IPL_VM); 704 mutex_init(&vmem_btag_lock, MUTEX_DEFAULT, IPL_VM); 705 706 while (static_bt_count-- > 0) { 707 bt_t *bt = &static_bts[static_bt_count]; 708 LIST_INSERT_HEAD(&vmem_btag_freelist, bt, bt_freelist); 709 VMEM_EVCNT_INCR(bt_count); 710 vmem_btag_freelist_count++; 711 } 712 } 713 714 void 715 vmem_init(vmem_t *vm) 716 { 717 718 kmem_va_meta_arena = vmem_create("vmem-va", 0, 0, PAGE_SIZE, 719 vmem_alloc, vmem_free, vm, 720 0, VM_NOSLEEP | VM_BOOTSTRAP | VM_LARGEIMPORT, 721 IPL_VM); 722 723 kmem_meta_arena = vmem_create("vmem-meta", 0, 0, PAGE_SIZE, 724 uvm_km_kmem_alloc, uvm_km_kmem_free, kmem_va_meta_arena, 725 0, VM_NOSLEEP | VM_BOOTSTRAP, IPL_VM); 726 } 727 #endif /* defined(_KERNEL) */ 728 729 static int 730 vmem_add1(vmem_t *vm, vmem_addr_t addr, vmem_size_t size, vm_flag_t flags, 731 int spanbttype) 732 { 733 bt_t *btspan; 734 bt_t *btfree; 735 736 KASSERT((flags & (VM_SLEEP|VM_NOSLEEP)) != 0); 737 KASSERT((~flags & (VM_SLEEP|VM_NOSLEEP)) != 0); 738 KASSERT(spanbttype == BT_TYPE_SPAN || 739 spanbttype == BT_TYPE_SPAN_STATIC); 740 741 btspan = bt_alloc(vm, flags); 742 if (btspan == NULL) { 743 return ENOMEM; 744 } 745 btfree = bt_alloc(vm, flags); 746 if (btfree == NULL) { 747 bt_free(vm, btspan); 748 return ENOMEM; 749 } 750 751 btspan->bt_type = spanbttype; 752 btspan->bt_start = addr; 753 btspan->bt_size = size; 754 755 btfree->bt_type = BT_TYPE_FREE; 756 btfree->bt_start = addr; 757 btfree->bt_size = size; 758 759 VMEM_LOCK(vm); 760 bt_insseg_tail(vm, btspan); 761 bt_insseg(vm, btfree, btspan); 762 bt_insfree(vm, btfree); 763 vm->vm_size += size; 764 VMEM_UNLOCK(vm); 765 766 return 0; 767 } 768 769 static void 770 vmem_destroy1(vmem_t *vm) 771 { 772 773 #if defined(QCACHE) 774 qc_destroy(vm); 775 #endif /* defined(QCACHE) */ 776 if (vm->vm_hashlist != NULL) { 777 int i; 778 779 for (i = 0; i < vm->vm_hashsize; i++) { 780 bt_t *bt; 781 782 while ((bt = LIST_FIRST(&vm->vm_hashlist[i])) != NULL) { 783 KASSERT(bt->bt_type == BT_TYPE_SPAN_STATIC); 784 bt_free(vm, bt); 785 } 786 } 787 if (vm->vm_hashlist != &vm->vm_hash0) { 788 xfree(vm->vm_hashlist, 789 sizeof(struct vmem_hashlist *) * vm->vm_hashsize); 790 } 791 } 792 793 while (vm->vm_nfreetags > 0) { 794 bt_t *bt = LIST_FIRST(&vm->vm_freetags); 795 LIST_REMOVE(bt, bt_freelist); 796 vm->vm_nfreetags--; 797 mutex_enter(&vmem_btag_lock); 798 #if defined (_KERNEL) 799 LIST_INSERT_HEAD(&vmem_btag_freelist, bt, bt_freelist); 800 vmem_btag_freelist_count++; 801 #endif /* defined(_KERNEL) */ 802 mutex_exit(&vmem_btag_lock); 803 } 804 805 VMEM_LOCK_DESTROY(vm); 806 xfree(vm, sizeof(*vm)); 807 } 808 809 static int 810 vmem_import(vmem_t *vm, vmem_size_t size, vm_flag_t flags) 811 { 812 vmem_addr_t addr; 813 int rc; 814 815 if (vm->vm_importfn == NULL) { 816 return EINVAL; 817 } 818 819 if (vm->vm_flags & VM_LARGEIMPORT) { 820 size *= 8; 821 } 822 823 if (vm->vm_flags & VM_XIMPORT) { 824 rc = ((vmem_ximport_t *)vm->vm_importfn)(vm->vm_arg, size, 825 &size, flags, &addr); 826 } else { 827 rc = (vm->vm_importfn)(vm->vm_arg, size, flags, &addr); 828 } 829 if (rc) { 830 return ENOMEM; 831 } 832 833 if (vmem_add1(vm, addr, size, flags, BT_TYPE_SPAN) != 0) { 834 (*vm->vm_releasefn)(vm->vm_arg, addr, size); 835 return ENOMEM; 836 } 837 838 return 0; 839 } 840 841 static int 842 vmem_rehash(vmem_t *vm, size_t newhashsize, vm_flag_t flags) 843 { 844 bt_t *bt; 845 int i; 846 struct vmem_hashlist *newhashlist; 847 struct vmem_hashlist *oldhashlist; 848 size_t oldhashsize; 849 850 KASSERT(newhashsize > 0); 851 852 newhashlist = 853 xmalloc(sizeof(struct vmem_hashlist *) * newhashsize, flags); 854 if (newhashlist == NULL) { 855 return ENOMEM; 856 } 857 for (i = 0; i < newhashsize; i++) { 858 LIST_INIT(&newhashlist[i]); 859 } 860 861 if (!VMEM_TRYLOCK(vm)) { 862 xfree(newhashlist, 863 sizeof(struct vmem_hashlist *) * newhashsize); 864 return EBUSY; 865 } 866 oldhashlist = vm->vm_hashlist; 867 oldhashsize = vm->vm_hashsize; 868 vm->vm_hashlist = newhashlist; 869 vm->vm_hashsize = newhashsize; 870 if (oldhashlist == NULL) { 871 VMEM_UNLOCK(vm); 872 return 0; 873 } 874 for (i = 0; i < oldhashsize; i++) { 875 while ((bt = LIST_FIRST(&oldhashlist[i])) != NULL) { 876 bt_rembusy(vm, bt); /* XXX */ 877 bt_insbusy(vm, bt); 878 } 879 } 880 VMEM_UNLOCK(vm); 881 882 if (oldhashlist != &vm->vm_hash0) { 883 xfree(oldhashlist, 884 sizeof(struct vmem_hashlist *) * oldhashsize); 885 } 886 887 return 0; 888 } 889 890 /* 891 * vmem_fit: check if a bt can satisfy the given restrictions. 892 * 893 * it's a caller's responsibility to ensure the region is big enough 894 * before calling us. 895 */ 896 897 static int 898 vmem_fit(const bt_t *bt, vmem_size_t size, vmem_size_t align, 899 vmem_size_t phase, vmem_size_t nocross, 900 vmem_addr_t minaddr, vmem_addr_t maxaddr, vmem_addr_t *addrp) 901 { 902 vmem_addr_t start; 903 vmem_addr_t end; 904 905 KASSERT(size > 0); 906 KASSERT(bt->bt_size >= size); /* caller's responsibility */ 907 908 /* 909 * XXX assumption: vmem_addr_t and vmem_size_t are 910 * unsigned integer of the same size. 911 */ 912 913 start = bt->bt_start; 914 if (start < minaddr) { 915 start = minaddr; 916 } 917 end = BT_END(bt); 918 if (end > maxaddr) { 919 end = maxaddr; 920 } 921 if (start > end) { 922 return ENOMEM; 923 } 924 925 start = VMEM_ALIGNUP(start - phase, align) + phase; 926 if (start < bt->bt_start) { 927 start += align; 928 } 929 if (VMEM_CROSS_P(start, start + size - 1, nocross)) { 930 KASSERT(align < nocross); 931 start = VMEM_ALIGNUP(start - phase, nocross) + phase; 932 } 933 if (start <= end && end - start >= size - 1) { 934 KASSERT((start & (align - 1)) == phase); 935 KASSERT(!VMEM_CROSS_P(start, start + size - 1, nocross)); 936 KASSERT(minaddr <= start); 937 KASSERT(maxaddr == 0 || start + size - 1 <= maxaddr); 938 KASSERT(bt->bt_start <= start); 939 KASSERT(BT_END(bt) - start >= size - 1); 940 *addrp = start; 941 return 0; 942 } 943 return ENOMEM; 944 } 945 946 947 /* 948 * vmem_create_internal: creates a vmem arena. 949 */ 950 951 static vmem_t * 952 vmem_create_internal(const char *name, vmem_addr_t base, vmem_size_t size, 953 vmem_size_t quantum, vmem_import_t *importfn, vmem_release_t *releasefn, 954 void *arg, vmem_size_t qcache_max, vm_flag_t flags, int ipl) 955 { 956 vmem_t *vm = NULL; 957 int i; 958 959 KASSERT((flags & (VM_SLEEP|VM_NOSLEEP)) != 0); 960 KASSERT((~flags & (VM_SLEEP|VM_NOSLEEP)) != 0); 961 KASSERT(quantum > 0); 962 963 if (flags & VM_BOOTSTRAP) { 964 #if defined(_KERNEL) 965 KASSERT(static_vmem_count > 0); 966 vm = &static_vmems[--static_vmem_count]; 967 #endif /* defined(_KERNEL) */ 968 } else { 969 vm = xmalloc(sizeof(*vm), flags); 970 } 971 if (vm == NULL) { 972 return NULL; 973 } 974 975 VMEM_CONDVAR_INIT(vm, "vmem"); 976 VMEM_LOCK_INIT(vm, ipl); 977 vm->vm_flags = flags; 978 vm->vm_nfreetags = 0; 979 LIST_INIT(&vm->vm_freetags); 980 strlcpy(vm->vm_name, name, sizeof(vm->vm_name)); 981 vm->vm_quantum_mask = quantum - 1; 982 vm->vm_quantum_shift = SIZE2ORDER(quantum); 983 KASSERT(ORDER2SIZE(vm->vm_quantum_shift) == quantum); 984 vm->vm_importfn = importfn; 985 vm->vm_releasefn = releasefn; 986 vm->vm_arg = arg; 987 vm->vm_nbusytag = 0; 988 vm->vm_size = 0; 989 vm->vm_inuse = 0; 990 #if defined(QCACHE) 991 qc_init(vm, qcache_max, ipl); 992 #endif /* defined(QCACHE) */ 993 994 CIRCLEQ_INIT(&vm->vm_seglist); 995 for (i = 0; i < VMEM_MAXORDER; i++) { 996 LIST_INIT(&vm->vm_freelist[i]); 997 } 998 vm->vm_hashlist = NULL; 999 if (flags & VM_BOOTSTRAP) { 1000 vm->vm_hashsize = 1; 1001 vm->vm_hashlist = &vm->vm_hash0; 1002 } else if (vmem_rehash(vm, VMEM_HASHSIZE_INIT, flags)) { 1003 vmem_destroy1(vm); 1004 return NULL; 1005 } 1006 1007 if (size != 0) { 1008 if (vmem_add(vm, base, size, flags) != 0) { 1009 vmem_destroy1(vm); 1010 return NULL; 1011 } 1012 } 1013 1014 #if defined(_KERNEL) 1015 if (flags & VM_BOOTSTRAP) { 1016 bt_refill(vm, VM_NOSLEEP); 1017 } 1018 1019 mutex_enter(&vmem_list_lock); 1020 LIST_INSERT_HEAD(&vmem_list, vm, vm_alllist); 1021 mutex_exit(&vmem_list_lock); 1022 #endif /* defined(_KERNEL) */ 1023 1024 return vm; 1025 } 1026 1027 1028 /* ---- vmem API */ 1029 1030 /* 1031 * vmem_create: create an arena. 1032 * 1033 * => must not be called from interrupt context. 1034 */ 1035 1036 vmem_t * 1037 vmem_create(const char *name, vmem_addr_t base, vmem_size_t size, 1038 vmem_size_t quantum, vmem_import_t *importfn, vmem_release_t *releasefn, 1039 vmem_t *source, vmem_size_t qcache_max, vm_flag_t flags, int ipl) 1040 { 1041 1042 KASSERT((flags & (VM_SLEEP|VM_NOSLEEP)) != 0); 1043 KASSERT((~flags & (VM_SLEEP|VM_NOSLEEP)) != 0); 1044 KASSERT((flags & (VM_XIMPORT)) == 0); 1045 1046 return vmem_create_internal(name, base, size, quantum, 1047 importfn, releasefn, source, qcache_max, flags, ipl); 1048 } 1049 1050 /* 1051 * vmem_xcreate: create an arena takes alternative import func. 1052 * 1053 * => must not be called from interrupt context. 1054 */ 1055 1056 vmem_t * 1057 vmem_xcreate(const char *name, vmem_addr_t base, vmem_size_t size, 1058 vmem_size_t quantum, vmem_ximport_t *importfn, vmem_release_t *releasefn, 1059 vmem_t *source, vmem_size_t qcache_max, vm_flag_t flags, int ipl) 1060 { 1061 1062 KASSERT((flags & (VM_SLEEP|VM_NOSLEEP)) != 0); 1063 KASSERT((~flags & (VM_SLEEP|VM_NOSLEEP)) != 0); 1064 KASSERT((flags & (VM_XIMPORT)) == 0); 1065 1066 return vmem_create_internal(name, base, size, quantum, 1067 (vmem_import_t *)importfn, releasefn, source, 1068 qcache_max, flags | VM_XIMPORT, ipl); 1069 } 1070 1071 void 1072 vmem_destroy(vmem_t *vm) 1073 { 1074 1075 #if defined(_KERNEL) 1076 mutex_enter(&vmem_list_lock); 1077 LIST_REMOVE(vm, vm_alllist); 1078 mutex_exit(&vmem_list_lock); 1079 #endif /* defined(_KERNEL) */ 1080 1081 vmem_destroy1(vm); 1082 } 1083 1084 vmem_size_t 1085 vmem_roundup_size(vmem_t *vm, vmem_size_t size) 1086 { 1087 1088 return (size + vm->vm_quantum_mask) & ~vm->vm_quantum_mask; 1089 } 1090 1091 /* 1092 * vmem_alloc: 1093 * 1094 * => caller must ensure appropriate spl, 1095 * if the arena can be accessed from interrupt context. 1096 */ 1097 1098 int 1099 vmem_alloc(vmem_t *vm, vmem_size_t size, vm_flag_t flags, vmem_addr_t *addrp) 1100 { 1101 const vm_flag_t strat __unused = flags & VM_FITMASK; 1102 1103 KASSERT((flags & (VM_SLEEP|VM_NOSLEEP)) != 0); 1104 KASSERT((~flags & (VM_SLEEP|VM_NOSLEEP)) != 0); 1105 1106 KASSERT(size > 0); 1107 KASSERT(strat == VM_BESTFIT || strat == VM_INSTANTFIT); 1108 if ((flags & VM_SLEEP) != 0) { 1109 ASSERT_SLEEPABLE(); 1110 } 1111 1112 #if defined(QCACHE) 1113 if (size <= vm->vm_qcache_max) { 1114 void *p; 1115 int qidx = (size + vm->vm_quantum_mask) >> vm->vm_quantum_shift; 1116 qcache_t *qc = vm->vm_qcache[qidx - 1]; 1117 1118 p = pool_cache_get(qc->qc_cache, vmf_to_prf(flags)); 1119 if (addrp != NULL) 1120 *addrp = (vmem_addr_t)p; 1121 return (p == NULL) ? ENOMEM : 0; 1122 } 1123 #endif /* defined(QCACHE) */ 1124 1125 return vmem_xalloc(vm, size, 0, 0, 0, VMEM_ADDR_MIN, VMEM_ADDR_MAX, 1126 flags, addrp); 1127 } 1128 1129 int 1130 vmem_xalloc(vmem_t *vm, const vmem_size_t size0, vmem_size_t align, 1131 const vmem_size_t phase, const vmem_size_t nocross, 1132 const vmem_addr_t minaddr, const vmem_addr_t maxaddr, const vm_flag_t flags, 1133 vmem_addr_t *addrp) 1134 { 1135 struct vmem_freelist *list; 1136 struct vmem_freelist *first; 1137 struct vmem_freelist *end; 1138 bt_t *bt; 1139 bt_t *btnew; 1140 bt_t *btnew2; 1141 const vmem_size_t size = vmem_roundup_size(vm, size0); 1142 vm_flag_t strat = flags & VM_FITMASK; 1143 vmem_addr_t start; 1144 int rc; 1145 1146 KASSERT(size0 > 0); 1147 KASSERT(size > 0); 1148 KASSERT(strat == VM_BESTFIT || strat == VM_INSTANTFIT); 1149 if ((flags & VM_SLEEP) != 0) { 1150 ASSERT_SLEEPABLE(); 1151 } 1152 KASSERT((align & vm->vm_quantum_mask) == 0); 1153 KASSERT((align & (align - 1)) == 0); 1154 KASSERT((phase & vm->vm_quantum_mask) == 0); 1155 KASSERT((nocross & vm->vm_quantum_mask) == 0); 1156 KASSERT((nocross & (nocross - 1)) == 0); 1157 KASSERT((align == 0 && phase == 0) || phase < align); 1158 KASSERT(nocross == 0 || nocross >= size); 1159 KASSERT(minaddr <= maxaddr); 1160 KASSERT(!VMEM_CROSS_P(phase, phase + size - 1, nocross)); 1161 1162 if (align == 0) { 1163 align = vm->vm_quantum_mask + 1; 1164 } 1165 1166 /* 1167 * allocate boundary tags before acquiring the vmem lock. 1168 */ 1169 btnew = bt_alloc(vm, flags); 1170 if (btnew == NULL) { 1171 return ENOMEM; 1172 } 1173 btnew2 = bt_alloc(vm, flags); /* XXX not necessary if no restrictions */ 1174 if (btnew2 == NULL) { 1175 bt_free(vm, btnew); 1176 return ENOMEM; 1177 } 1178 1179 /* 1180 * choose a free block from which we allocate. 1181 */ 1182 retry_strat: 1183 first = bt_freehead_toalloc(vm, size, strat); 1184 end = &vm->vm_freelist[VMEM_MAXORDER]; 1185 retry: 1186 bt = NULL; 1187 VMEM_LOCK(vm); 1188 vmem_check(vm); 1189 if (strat == VM_INSTANTFIT) { 1190 /* 1191 * just choose the first block which satisfies our restrictions. 1192 * 1193 * note that we don't need to check the size of the blocks 1194 * because any blocks found on these list should be larger than 1195 * the given size. 1196 */ 1197 for (list = first; list < end; list++) { 1198 bt = LIST_FIRST(list); 1199 if (bt != NULL) { 1200 rc = vmem_fit(bt, size, align, phase, 1201 nocross, minaddr, maxaddr, &start); 1202 if (rc == 0) { 1203 goto gotit; 1204 } 1205 /* 1206 * don't bother to follow the bt_freelist link 1207 * here. the list can be very long and we are 1208 * told to run fast. blocks from the later free 1209 * lists are larger and have better chances to 1210 * satisfy our restrictions. 1211 */ 1212 } 1213 } 1214 } else { /* VM_BESTFIT */ 1215 /* 1216 * we assume that, for space efficiency, it's better to 1217 * allocate from a smaller block. thus we will start searching 1218 * from the lower-order list than VM_INSTANTFIT. 1219 * however, don't bother to find the smallest block in a free 1220 * list because the list can be very long. we can revisit it 1221 * if/when it turns out to be a problem. 1222 * 1223 * note that the 'first' list can contain blocks smaller than 1224 * the requested size. thus we need to check bt_size. 1225 */ 1226 for (list = first; list < end; list++) { 1227 LIST_FOREACH(bt, list, bt_freelist) { 1228 if (bt->bt_size >= size) { 1229 rc = vmem_fit(bt, size, align, phase, 1230 nocross, minaddr, maxaddr, &start); 1231 if (rc == 0) { 1232 goto gotit; 1233 } 1234 } 1235 } 1236 } 1237 } 1238 VMEM_UNLOCK(vm); 1239 #if 1 1240 if (strat == VM_INSTANTFIT) { 1241 strat = VM_BESTFIT; 1242 goto retry_strat; 1243 } 1244 #endif 1245 if (align != vm->vm_quantum_mask + 1 || phase != 0 || nocross != 0) { 1246 1247 /* 1248 * XXX should try to import a region large enough to 1249 * satisfy restrictions? 1250 */ 1251 1252 goto fail; 1253 } 1254 /* XXX eeek, minaddr & maxaddr not respected */ 1255 if (vmem_import(vm, size, flags) == 0) { 1256 goto retry; 1257 } 1258 /* XXX */ 1259 1260 if ((flags & VM_SLEEP) != 0) { 1261 #if defined(_KERNEL) && !defined(_RUMPKERNEL) 1262 mutex_spin_enter(&uvm_fpageqlock); 1263 uvm_kick_pdaemon(); 1264 mutex_spin_exit(&uvm_fpageqlock); 1265 #endif 1266 VMEM_LOCK(vm); 1267 VMEM_CONDVAR_WAIT(vm); 1268 VMEM_UNLOCK(vm); 1269 goto retry; 1270 } 1271 fail: 1272 bt_free(vm, btnew); 1273 bt_free(vm, btnew2); 1274 return ENOMEM; 1275 1276 gotit: 1277 KASSERT(bt->bt_type == BT_TYPE_FREE); 1278 KASSERT(bt->bt_size >= size); 1279 bt_remfree(vm, bt); 1280 vmem_check(vm); 1281 if (bt->bt_start != start) { 1282 btnew2->bt_type = BT_TYPE_FREE; 1283 btnew2->bt_start = bt->bt_start; 1284 btnew2->bt_size = start - bt->bt_start; 1285 bt->bt_start = start; 1286 bt->bt_size -= btnew2->bt_size; 1287 bt_insfree(vm, btnew2); 1288 bt_insseg(vm, btnew2, CIRCLEQ_PREV(bt, bt_seglist)); 1289 btnew2 = NULL; 1290 vmem_check(vm); 1291 } 1292 KASSERT(bt->bt_start == start); 1293 if (bt->bt_size != size && bt->bt_size - size > vm->vm_quantum_mask) { 1294 /* split */ 1295 btnew->bt_type = BT_TYPE_BUSY; 1296 btnew->bt_start = bt->bt_start; 1297 btnew->bt_size = size; 1298 bt->bt_start = bt->bt_start + size; 1299 bt->bt_size -= size; 1300 bt_insfree(vm, bt); 1301 bt_insseg(vm, btnew, CIRCLEQ_PREV(bt, bt_seglist)); 1302 bt_insbusy(vm, btnew); 1303 vmem_check(vm); 1304 VMEM_UNLOCK(vm); 1305 } else { 1306 bt->bt_type = BT_TYPE_BUSY; 1307 bt_insbusy(vm, bt); 1308 vmem_check(vm); 1309 VMEM_UNLOCK(vm); 1310 bt_free(vm, btnew); 1311 btnew = bt; 1312 } 1313 if (btnew2 != NULL) { 1314 bt_free(vm, btnew2); 1315 } 1316 KASSERT(btnew->bt_size >= size); 1317 btnew->bt_type = BT_TYPE_BUSY; 1318 1319 if (addrp != NULL) 1320 *addrp = btnew->bt_start; 1321 return 0; 1322 } 1323 1324 /* 1325 * vmem_free: 1326 * 1327 * => caller must ensure appropriate spl, 1328 * if the arena can be accessed from interrupt context. 1329 */ 1330 1331 void 1332 vmem_free(vmem_t *vm, vmem_addr_t addr, vmem_size_t size) 1333 { 1334 1335 KASSERT(size > 0); 1336 1337 #if defined(QCACHE) 1338 if (size <= vm->vm_qcache_max) { 1339 int qidx = (size + vm->vm_quantum_mask) >> vm->vm_quantum_shift; 1340 qcache_t *qc = vm->vm_qcache[qidx - 1]; 1341 1342 pool_cache_put(qc->qc_cache, (void *)addr); 1343 return; 1344 } 1345 #endif /* defined(QCACHE) */ 1346 1347 vmem_xfree(vm, addr, size); 1348 } 1349 1350 void 1351 vmem_xfree(vmem_t *vm, vmem_addr_t addr, vmem_size_t size) 1352 { 1353 bt_t *bt; 1354 bt_t *t; 1355 LIST_HEAD(, vmem_btag) tofree; 1356 1357 LIST_INIT(&tofree); 1358 1359 KASSERT(size > 0); 1360 1361 VMEM_LOCK(vm); 1362 1363 bt = bt_lookupbusy(vm, addr); 1364 KASSERT(bt != NULL); 1365 KASSERT(bt->bt_start == addr); 1366 KASSERT(bt->bt_size == vmem_roundup_size(vm, size) || 1367 bt->bt_size - vmem_roundup_size(vm, size) <= vm->vm_quantum_mask); 1368 KASSERT(bt->bt_type == BT_TYPE_BUSY); 1369 bt_rembusy(vm, bt); 1370 bt->bt_type = BT_TYPE_FREE; 1371 1372 /* coalesce */ 1373 t = CIRCLEQ_NEXT(bt, bt_seglist); 1374 if (t != NULL && t->bt_type == BT_TYPE_FREE) { 1375 KASSERT(BT_END(bt) < t->bt_start); /* YYY */ 1376 bt_remfree(vm, t); 1377 bt_remseg(vm, t); 1378 bt->bt_size += t->bt_size; 1379 LIST_INSERT_HEAD(&tofree, t, bt_freelist); 1380 } 1381 t = CIRCLEQ_PREV(bt, bt_seglist); 1382 if (t != NULL && t->bt_type == BT_TYPE_FREE) { 1383 KASSERT(BT_END(t) < bt->bt_start); /* YYY */ 1384 bt_remfree(vm, t); 1385 bt_remseg(vm, t); 1386 bt->bt_size += t->bt_size; 1387 bt->bt_start = t->bt_start; 1388 LIST_INSERT_HEAD(&tofree, t, bt_freelist); 1389 } 1390 1391 t = CIRCLEQ_PREV(bt, bt_seglist); 1392 KASSERT(t != NULL); 1393 KASSERT(BT_ISSPAN_P(t) || t->bt_type == BT_TYPE_BUSY); 1394 if (vm->vm_releasefn != NULL && t->bt_type == BT_TYPE_SPAN && 1395 t->bt_size == bt->bt_size) { 1396 vmem_addr_t spanaddr; 1397 vmem_size_t spansize; 1398 1399 KASSERT(t->bt_start == bt->bt_start); 1400 spanaddr = bt->bt_start; 1401 spansize = bt->bt_size; 1402 bt_remseg(vm, bt); 1403 LIST_INSERT_HEAD(&tofree, bt, bt_freelist); 1404 bt_remseg(vm, t); 1405 LIST_INSERT_HEAD(&tofree, t, bt_freelist); 1406 vm->vm_size -= spansize; 1407 VMEM_CONDVAR_BROADCAST(vm); 1408 VMEM_UNLOCK(vm); 1409 (*vm->vm_releasefn)(vm->vm_arg, spanaddr, spansize); 1410 } else { 1411 bt_insfree(vm, bt); 1412 VMEM_CONDVAR_BROADCAST(vm); 1413 VMEM_UNLOCK(vm); 1414 } 1415 1416 while (!LIST_EMPTY(&tofree)) { 1417 t = LIST_FIRST(&tofree); 1418 LIST_REMOVE(t, bt_freelist); 1419 bt_free(vm, t); 1420 } 1421 } 1422 1423 /* 1424 * vmem_add: 1425 * 1426 * => caller must ensure appropriate spl, 1427 * if the arena can be accessed from interrupt context. 1428 */ 1429 1430 int 1431 vmem_add(vmem_t *vm, vmem_addr_t addr, vmem_size_t size, vm_flag_t flags) 1432 { 1433 1434 return vmem_add1(vm, addr, size, flags, BT_TYPE_SPAN_STATIC); 1435 } 1436 1437 /* 1438 * vmem_size: information about arenas size 1439 * 1440 * => return free/allocated size in arena 1441 */ 1442 vmem_size_t 1443 vmem_size(vmem_t *vm, int typemask) 1444 { 1445 1446 switch (typemask) { 1447 case VMEM_ALLOC: 1448 return vm->vm_inuse; 1449 case VMEM_FREE: 1450 return vm->vm_size - vm->vm_inuse; 1451 case VMEM_FREE|VMEM_ALLOC: 1452 return vm->vm_size; 1453 default: 1454 panic("vmem_size"); 1455 } 1456 } 1457 1458 /* ---- rehash */ 1459 1460 #if defined(_KERNEL) 1461 static struct callout vmem_rehash_ch; 1462 static int vmem_rehash_interval; 1463 static struct workqueue *vmem_rehash_wq; 1464 static struct work vmem_rehash_wk; 1465 1466 static void 1467 vmem_rehash_all(struct work *wk, void *dummy) 1468 { 1469 vmem_t *vm; 1470 1471 KASSERT(wk == &vmem_rehash_wk); 1472 mutex_enter(&vmem_list_lock); 1473 LIST_FOREACH(vm, &vmem_list, vm_alllist) { 1474 size_t desired; 1475 size_t current; 1476 1477 if (!VMEM_TRYLOCK(vm)) { 1478 continue; 1479 } 1480 desired = vm->vm_nbusytag; 1481 current = vm->vm_hashsize; 1482 VMEM_UNLOCK(vm); 1483 1484 if (desired > VMEM_HASHSIZE_MAX) { 1485 desired = VMEM_HASHSIZE_MAX; 1486 } else if (desired < VMEM_HASHSIZE_MIN) { 1487 desired = VMEM_HASHSIZE_MIN; 1488 } 1489 if (desired > current * 2 || desired * 2 < current) { 1490 vmem_rehash(vm, desired, VM_NOSLEEP); 1491 } 1492 } 1493 mutex_exit(&vmem_list_lock); 1494 1495 callout_schedule(&vmem_rehash_ch, vmem_rehash_interval); 1496 } 1497 1498 static void 1499 vmem_rehash_all_kick(void *dummy) 1500 { 1501 1502 workqueue_enqueue(vmem_rehash_wq, &vmem_rehash_wk, NULL); 1503 } 1504 1505 void 1506 vmem_rehash_start(void) 1507 { 1508 int error; 1509 1510 error = workqueue_create(&vmem_rehash_wq, "vmem_rehash", 1511 vmem_rehash_all, NULL, PRI_VM, IPL_SOFTCLOCK, WQ_MPSAFE); 1512 if (error) { 1513 panic("%s: workqueue_create %d\n", __func__, error); 1514 } 1515 callout_init(&vmem_rehash_ch, CALLOUT_MPSAFE); 1516 callout_setfunc(&vmem_rehash_ch, vmem_rehash_all_kick, NULL); 1517 1518 vmem_rehash_interval = hz * 10; 1519 callout_schedule(&vmem_rehash_ch, vmem_rehash_interval); 1520 } 1521 #endif /* defined(_KERNEL) */ 1522 1523 /* ---- debug */ 1524 1525 #if defined(DDB) || defined(UNITTEST) || defined(VMEM_SANITY) 1526 1527 static void bt_dump(const bt_t *, void (*)(const char *, ...)); 1528 1529 static const char * 1530 bt_type_string(int type) 1531 { 1532 static const char * const table[] = { 1533 [BT_TYPE_BUSY] = "busy", 1534 [BT_TYPE_FREE] = "free", 1535 [BT_TYPE_SPAN] = "span", 1536 [BT_TYPE_SPAN_STATIC] = "static span", 1537 }; 1538 1539 if (type >= __arraycount(table)) { 1540 return "BOGUS"; 1541 } 1542 return table[type]; 1543 } 1544 1545 static void 1546 bt_dump(const bt_t *bt, void (*pr)(const char *, ...)) 1547 { 1548 1549 (*pr)("\t%p: %" PRIu64 ", %" PRIu64 ", %d(%s)\n", 1550 bt, (uint64_t)bt->bt_start, (uint64_t)bt->bt_size, 1551 bt->bt_type, bt_type_string(bt->bt_type)); 1552 } 1553 1554 static void 1555 vmem_dump(const vmem_t *vm , void (*pr)(const char *, ...)) 1556 { 1557 const bt_t *bt; 1558 int i; 1559 1560 (*pr)("vmem %p '%s'\n", vm, vm->vm_name); 1561 CIRCLEQ_FOREACH(bt, &vm->vm_seglist, bt_seglist) { 1562 bt_dump(bt, pr); 1563 } 1564 1565 for (i = 0; i < VMEM_MAXORDER; i++) { 1566 const struct vmem_freelist *fl = &vm->vm_freelist[i]; 1567 1568 if (LIST_EMPTY(fl)) { 1569 continue; 1570 } 1571 1572 (*pr)("freelist[%d]\n", i); 1573 LIST_FOREACH(bt, fl, bt_freelist) { 1574 bt_dump(bt, pr); 1575 } 1576 } 1577 } 1578 1579 #endif /* defined(DDB) || defined(UNITTEST) || defined(VMEM_SANITY) */ 1580 1581 #if defined(DDB) 1582 static bt_t * 1583 vmem_whatis_lookup(vmem_t *vm, uintptr_t addr) 1584 { 1585 bt_t *bt; 1586 1587 CIRCLEQ_FOREACH(bt, &vm->vm_seglist, bt_seglist) { 1588 if (BT_ISSPAN_P(bt)) { 1589 continue; 1590 } 1591 if (bt->bt_start <= addr && addr <= BT_END(bt)) { 1592 return bt; 1593 } 1594 } 1595 1596 return NULL; 1597 } 1598 1599 void 1600 vmem_whatis(uintptr_t addr, void (*pr)(const char *, ...)) 1601 { 1602 vmem_t *vm; 1603 1604 LIST_FOREACH(vm, &vmem_list, vm_alllist) { 1605 bt_t *bt; 1606 1607 bt = vmem_whatis_lookup(vm, addr); 1608 if (bt == NULL) { 1609 continue; 1610 } 1611 (*pr)("%p is %p+%zu in VMEM '%s' (%s)\n", 1612 (void *)addr, (void *)bt->bt_start, 1613 (size_t)(addr - bt->bt_start), vm->vm_name, 1614 (bt->bt_type == BT_TYPE_BUSY) ? "allocated" : "free"); 1615 } 1616 } 1617 1618 void 1619 vmem_printall(const char *modif, void (*pr)(const char *, ...)) 1620 { 1621 const vmem_t *vm; 1622 1623 LIST_FOREACH(vm, &vmem_list, vm_alllist) { 1624 vmem_dump(vm, pr); 1625 } 1626 } 1627 1628 void 1629 vmem_print(uintptr_t addr, const char *modif, void (*pr)(const char *, ...)) 1630 { 1631 const vmem_t *vm = (const void *)addr; 1632 1633 vmem_dump(vm, pr); 1634 } 1635 #endif /* defined(DDB) */ 1636 1637 #if defined(_KERNEL) 1638 #define vmem_printf printf 1639 #else 1640 #include <stdio.h> 1641 #include <stdarg.h> 1642 1643 static void 1644 vmem_printf(const char *fmt, ...) 1645 { 1646 va_list ap; 1647 va_start(ap, fmt); 1648 vprintf(fmt, ap); 1649 va_end(ap); 1650 } 1651 #endif 1652 1653 #if defined(VMEM_SANITY) 1654 1655 static bool 1656 vmem_check_sanity(vmem_t *vm) 1657 { 1658 const bt_t *bt, *bt2; 1659 1660 KASSERT(vm != NULL); 1661 1662 CIRCLEQ_FOREACH(bt, &vm->vm_seglist, bt_seglist) { 1663 if (bt->bt_start > BT_END(bt)) { 1664 printf("corrupted tag\n"); 1665 bt_dump(bt, vmem_printf); 1666 return false; 1667 } 1668 } 1669 CIRCLEQ_FOREACH(bt, &vm->vm_seglist, bt_seglist) { 1670 CIRCLEQ_FOREACH(bt2, &vm->vm_seglist, bt_seglist) { 1671 if (bt == bt2) { 1672 continue; 1673 } 1674 if (BT_ISSPAN_P(bt) != BT_ISSPAN_P(bt2)) { 1675 continue; 1676 } 1677 if (bt->bt_start <= BT_END(bt2) && 1678 bt2->bt_start <= BT_END(bt)) { 1679 printf("overwrapped tags\n"); 1680 bt_dump(bt, vmem_printf); 1681 bt_dump(bt2, vmem_printf); 1682 return false; 1683 } 1684 } 1685 } 1686 1687 return true; 1688 } 1689 1690 static void 1691 vmem_check(vmem_t *vm) 1692 { 1693 1694 if (!vmem_check_sanity(vm)) { 1695 panic("insanity vmem %p", vm); 1696 } 1697 } 1698 1699 #endif /* defined(VMEM_SANITY) */ 1700 1701 #if defined(UNITTEST) 1702 int 1703 main(void) 1704 { 1705 int rc; 1706 vmem_t *vm; 1707 vmem_addr_t p; 1708 struct reg { 1709 vmem_addr_t p; 1710 vmem_size_t sz; 1711 bool x; 1712 } *reg = NULL; 1713 int nreg = 0; 1714 int nalloc = 0; 1715 int nfree = 0; 1716 vmem_size_t total = 0; 1717 #if 1 1718 vm_flag_t strat = VM_INSTANTFIT; 1719 #else 1720 vm_flag_t strat = VM_BESTFIT; 1721 #endif 1722 1723 vm = vmem_create("test", 0, 0, 1, NULL, NULL, NULL, 0, VM_SLEEP, 1724 #ifdef _KERNEL 1725 IPL_NONE 1726 #else 1727 0 1728 #endif 1729 ); 1730 if (vm == NULL) { 1731 printf("vmem_create\n"); 1732 exit(EXIT_FAILURE); 1733 } 1734 vmem_dump(vm, vmem_printf); 1735 1736 rc = vmem_add(vm, 0, 50, VM_SLEEP); 1737 assert(rc == 0); 1738 rc = vmem_add(vm, 100, 200, VM_SLEEP); 1739 assert(rc == 0); 1740 rc = vmem_add(vm, 2000, 1, VM_SLEEP); 1741 assert(rc == 0); 1742 rc = vmem_add(vm, 40000, 65536, VM_SLEEP); 1743 assert(rc == 0); 1744 rc = vmem_add(vm, 10000, 10000, VM_SLEEP); 1745 assert(rc == 0); 1746 rc = vmem_add(vm, 500, 1000, VM_SLEEP); 1747 assert(rc == 0); 1748 rc = vmem_add(vm, 0xffffff00, 0x100, VM_SLEEP); 1749 assert(rc == 0); 1750 rc = vmem_xalloc(vm, 0x101, 0, 0, 0, 1751 0xffffff00, 0xffffffff, strat|VM_SLEEP, &p); 1752 assert(rc != 0); 1753 rc = vmem_xalloc(vm, 50, 0, 0, 0, 0, 49, strat|VM_SLEEP, &p); 1754 assert(rc == 0 && p == 0); 1755 vmem_xfree(vm, p, 50); 1756 rc = vmem_xalloc(vm, 25, 0, 0, 0, 0, 24, strat|VM_SLEEP, &p); 1757 assert(rc == 0 && p == 0); 1758 rc = vmem_xalloc(vm, 0x100, 0, 0, 0, 1759 0xffffff01, 0xffffffff, strat|VM_SLEEP, &p); 1760 assert(rc != 0); 1761 rc = vmem_xalloc(vm, 0x100, 0, 0, 0, 1762 0xffffff00, 0xfffffffe, strat|VM_SLEEP, &p); 1763 assert(rc != 0); 1764 rc = vmem_xalloc(vm, 0x100, 0, 0, 0, 1765 0xffffff00, 0xffffffff, strat|VM_SLEEP, &p); 1766 assert(rc == 0); 1767 vmem_dump(vm, vmem_printf); 1768 for (;;) { 1769 struct reg *r; 1770 int t = rand() % 100; 1771 1772 if (t > 45) { 1773 /* alloc */ 1774 vmem_size_t sz = rand() % 500 + 1; 1775 bool x; 1776 vmem_size_t align, phase, nocross; 1777 vmem_addr_t minaddr, maxaddr; 1778 1779 if (t > 70) { 1780 x = true; 1781 /* XXX */ 1782 align = 1 << (rand() % 15); 1783 phase = rand() % 65536; 1784 nocross = 1 << (rand() % 15); 1785 if (align <= phase) { 1786 phase = 0; 1787 } 1788 if (VMEM_CROSS_P(phase, phase + sz - 1, 1789 nocross)) { 1790 nocross = 0; 1791 } 1792 do { 1793 minaddr = rand() % 50000; 1794 maxaddr = rand() % 70000; 1795 } while (minaddr > maxaddr); 1796 printf("=== xalloc %" PRIu64 1797 " align=%" PRIu64 ", phase=%" PRIu64 1798 ", nocross=%" PRIu64 ", min=%" PRIu64 1799 ", max=%" PRIu64 "\n", 1800 (uint64_t)sz, 1801 (uint64_t)align, 1802 (uint64_t)phase, 1803 (uint64_t)nocross, 1804 (uint64_t)minaddr, 1805 (uint64_t)maxaddr); 1806 rc = vmem_xalloc(vm, sz, align, phase, nocross, 1807 minaddr, maxaddr, strat|VM_SLEEP, &p); 1808 } else { 1809 x = false; 1810 printf("=== alloc %" PRIu64 "\n", (uint64_t)sz); 1811 rc = vmem_alloc(vm, sz, strat|VM_SLEEP, &p); 1812 } 1813 printf("-> %" PRIu64 "\n", (uint64_t)p); 1814 vmem_dump(vm, vmem_printf); 1815 if (rc != 0) { 1816 if (x) { 1817 continue; 1818 } 1819 break; 1820 } 1821 nreg++; 1822 reg = realloc(reg, sizeof(*reg) * nreg); 1823 r = ®[nreg - 1]; 1824 r->p = p; 1825 r->sz = sz; 1826 r->x = x; 1827 total += sz; 1828 nalloc++; 1829 } else if (nreg != 0) { 1830 /* free */ 1831 r = ®[rand() % nreg]; 1832 printf("=== free %" PRIu64 ", %" PRIu64 "\n", 1833 (uint64_t)r->p, (uint64_t)r->sz); 1834 if (r->x) { 1835 vmem_xfree(vm, r->p, r->sz); 1836 } else { 1837 vmem_free(vm, r->p, r->sz); 1838 } 1839 total -= r->sz; 1840 vmem_dump(vm, vmem_printf); 1841 *r = reg[nreg - 1]; 1842 nreg--; 1843 nfree++; 1844 } 1845 printf("total=%" PRIu64 "\n", (uint64_t)total); 1846 } 1847 fprintf(stderr, "total=%" PRIu64 ", nalloc=%d, nfree=%d\n", 1848 (uint64_t)total, nalloc, nfree); 1849 exit(EXIT_SUCCESS); 1850 } 1851 #endif /* defined(UNITTEST) */ 1852