1 /* 2 * NMALLOC.C - New Malloc (ported from kernel slab allocator) 3 * 4 * Copyright (c) 2003,2004,2009,2010 The DragonFly Project. All rights reserved. 5 * 6 * This code is derived from software contributed to The DragonFly Project 7 * by Matthew Dillon <dillon@backplane.com> and by 8 * Venkatesh Srinivas <me@endeavour.zapto.org>. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in 18 * the documentation and/or other materials provided with the 19 * distribution. 20 * 3. Neither the name of The DragonFly Project nor the names of its 21 * contributors may be used to endorse or promote products derived 22 * from this software without specific, prior written permission. 23 * 24 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 25 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 26 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 27 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 28 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 29 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, 30 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 31 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 32 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 33 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 34 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 35 * SUCH DAMAGE. 36 * 37 * $Id: nmalloc.c,v 1.37 2010/07/23 08:20:35 vsrinivas Exp $ 38 */ 39 /* 40 * This module implements a slab allocator drop-in replacement for the 41 * libc malloc(). 42 * 43 * A slab allocator reserves a ZONE for each chunk size, then lays the 44 * chunks out in an array within the zone. Allocation and deallocation 45 * is nearly instantaneous, and overhead losses are limited to a fixed 46 * worst-case amount. 47 * 48 * The slab allocator does not have to pre-initialize the list of 49 * free chunks for each zone, and the underlying VM will not be 50 * touched at all beyond the zone header until an actual allocation 51 * needs it. 52 * 53 * Slab management and locking is done on a per-zone basis. 54 * 55 * Alloc Size Chunking Number of zones 56 * 0-127 8 16 57 * 128-255 16 8 58 * 256-511 32 8 59 * 512-1023 64 8 60 * 1024-2047 128 8 61 * 2048-4095 256 8 62 * 4096-8191 512 8 63 * 8192-16383 1024 8 64 * 16384-32767 2048 8 65 * 66 * Allocations >= ZoneLimit (16K) go directly to mmap and a hash table 67 * is used to locate for free. One and Two-page allocations use the 68 * zone mechanic to avoid excessive mmap()/munmap() calls. 69 * 70 * API FEATURES AND SIDE EFFECTS 71 * 72 * + power-of-2 sized allocations up to a page will be power-of-2 aligned. 73 * Above that power-of-2 sized allocations are page-aligned. Non 74 * power-of-2 sized allocations are aligned the same as the chunk 75 * size for their zone. 76 * + malloc(0) returns a special non-NULL value 77 * + ability to allocate arbitrarily large chunks of memory 78 * + realloc will reuse the passed pointer if possible, within the 79 * limitations of the zone chunking. 80 * 81 * Multithreaded enhancements for small allocations introduced August 2010. 82 * These are in the spirit of 'libumem'. See: 83 * Bonwick, J.; Adams, J. (2001). "Magazines and Vmem: Extending the 84 * slab allocator to many CPUs and arbitrary resources". In Proc. 2001 85 * USENIX Technical Conference. USENIX Association. 86 * 87 * TUNING 88 * 89 * The value of the environment variable MALLOC_OPTIONS is a character string 90 * containing various flags to tune nmalloc. 91 * 92 * 'U' / ['u'] Generate / do not generate utrace entries for ktrace(1) 93 * This will generate utrace events for all malloc, 94 * realloc, and free calls. There are tools (mtrplay) to 95 * replay and allocation pattern or to graph heap structure 96 * (mtrgraph) which can interpret these logs. 97 * 'Z' / ['z'] Zero out / do not zero all allocations. 98 * Each new byte of memory allocated by malloc, realloc, or 99 * reallocf will be initialized to 0. This is intended for 100 * debugging and will affect performance negatively. 101 * 'H' / ['h'] Pass a hint to the kernel about pages unused by the 102 * allocation functions. 103 */ 104 105 /* cc -shared -fPIC -g -O -I/usr/src/lib/libc/include -o nmalloc.so nmalloc.c */ 106 107 #include "libc_private.h" 108 109 #include <sys/param.h> 110 #include <sys/types.h> 111 #include <sys/mman.h> 112 #include <sys/queue.h> 113 #include <sys/uio.h> 114 #include <sys/ktrace.h> 115 #include <stdio.h> 116 #include <stdint.h> 117 #include <stdlib.h> 118 #include <stdarg.h> 119 #include <stddef.h> 120 #include <unistd.h> 121 #include <string.h> 122 #include <fcntl.h> 123 #include <errno.h> 124 #include <pthread.h> 125 126 #include "spinlock.h" 127 #include "un-namespace.h" 128 129 static char rcsid[] = "$Id: nmalloc.c,v 1.37 2010/07/23 08:20:35 sv5679 Exp $"; 130 131 /* 132 * Linked list of large allocations 133 */ 134 typedef struct bigalloc { 135 struct bigalloc *next; /* hash link */ 136 void *base; /* base pointer */ 137 u_long bytes; /* bytes allocated */ 138 } *bigalloc_t; 139 140 /* 141 * Note that any allocations which are exact multiples of PAGE_SIZE, or 142 * which are >= ZALLOC_ZONE_LIMIT, will fall through to the kmem subsystem. 143 */ 144 #define ZALLOC_ZONE_LIMIT (16 * 1024) /* max slab-managed alloc */ 145 #define ZALLOC_MIN_ZONE_SIZE (32 * 1024) /* minimum zone size */ 146 #define ZALLOC_MAX_ZONE_SIZE (128 * 1024) /* maximum zone size */ 147 #define ZALLOC_ZONE_SIZE (64 * 1024) 148 #define ZALLOC_SLAB_MAGIC 0x736c6162 /* magic sanity */ 149 #define ZALLOC_SLAB_SLIDE 20 /* L1-cache skip */ 150 151 #if ZALLOC_ZONE_LIMIT == 16384 152 #define NZONES 72 153 #elif ZALLOC_ZONE_LIMIT == 32768 154 #define NZONES 80 155 #else 156 #error "I couldn't figure out NZONES" 157 #endif 158 159 /* 160 * Chunk structure for free elements 161 */ 162 typedef struct slchunk { 163 struct slchunk *c_Next; 164 } *slchunk_t; 165 166 /* 167 * The IN-BAND zone header is placed at the beginning of each zone. 168 */ 169 struct slglobaldata; 170 171 typedef struct slzone { 172 int32_t z_Magic; /* magic number for sanity check */ 173 int z_NFree; /* total free chunks / ualloc space */ 174 struct slzone *z_Next; /* ZoneAry[] link if z_NFree non-zero */ 175 int z_NMax; /* maximum free chunks */ 176 char *z_BasePtr; /* pointer to start of chunk array */ 177 int z_UIndex; /* current initial allocation index */ 178 int z_UEndIndex; /* last (first) allocation index */ 179 int z_ChunkSize; /* chunk size for validation */ 180 int z_FirstFreePg; /* chunk list on a page-by-page basis */ 181 int z_ZoneIndex; 182 int z_Flags; 183 struct slchunk *z_PageAry[ZALLOC_ZONE_SIZE / PAGE_SIZE]; 184 #if defined(INVARIANTS) 185 __uint32_t z_Bitmap[]; /* bitmap of free chunks / sanity */ 186 #endif 187 } *slzone_t; 188 189 typedef struct slglobaldata { 190 spinlock_t Spinlock; 191 slzone_t ZoneAry[NZONES];/* linked list of zones NFree > 0 */ 192 int JunkIndex; 193 } *slglobaldata_t; 194 195 #define SLZF_UNOTZEROD 0x0001 196 197 #define FASTSLABREALLOC 0x02 198 199 /* 200 * Misc constants. Note that allocations that are exact multiples of 201 * PAGE_SIZE, or exceed the zone limit, fall through to the kmem module. 202 * IN_SAME_PAGE_MASK is used to sanity-check the per-page free lists. 203 */ 204 #define MIN_CHUNK_SIZE 8 /* in bytes */ 205 #define MIN_CHUNK_MASK (MIN_CHUNK_SIZE - 1) 206 #define IN_SAME_PAGE_MASK (~(intptr_t)PAGE_MASK | MIN_CHUNK_MASK) 207 208 /* 209 * The WEIRD_ADDR is used as known text to copy into free objects to 210 * try to create deterministic failure cases if the data is accessed after 211 * free. 212 * 213 * WARNING: A limited number of spinlocks are available, BIGXSIZE should 214 * not be larger then 64. 215 */ 216 #define WEIRD_ADDR 0xdeadc0de 217 #define MAX_COPY sizeof(weirdary) 218 #define ZERO_LENGTH_PTR ((void *)&malloc_dummy_pointer) 219 220 #define BIGHSHIFT 10 /* bigalloc hash table */ 221 #define BIGHSIZE (1 << BIGHSHIFT) 222 #define BIGHMASK (BIGHSIZE - 1) 223 #define BIGXSIZE (BIGHSIZE / 16) /* bigalloc lock table */ 224 #define BIGXMASK (BIGXSIZE - 1) 225 226 #define SAFLAG_ZERO 0x0001 227 #define SAFLAG_PASSIVE 0x0002 228 229 /* 230 * Thread control 231 */ 232 233 #define arysize(ary) (sizeof(ary)/sizeof((ary)[0])) 234 235 #define MASSERT(exp) do { if (__predict_false(!(exp))) \ 236 _mpanic("assertion: %s in %s", \ 237 #exp, __func__); \ 238 } while (0) 239 240 /* 241 * Magazines 242 */ 243 244 #define M_MAX_ROUNDS 64 245 #define M_ZONE_ROUNDS 64 246 #define M_LOW_ROUNDS 32 247 #define M_INIT_ROUNDS 8 248 #define M_BURST_FACTOR 8 249 #define M_BURST_NSCALE 2 250 251 #define M_BURST 0x0001 252 #define M_BURST_EARLY 0x0002 253 254 struct magazine { 255 SLIST_ENTRY(magazine) nextmagazine; 256 257 int flags; 258 int capacity; /* Max rounds in this magazine */ 259 int rounds; /* Current number of free rounds */ 260 int burst_factor; /* Number of blocks to prefill with */ 261 int low_factor; /* Free till low_factor from full mag */ 262 void *objects[M_MAX_ROUNDS]; 263 }; 264 265 SLIST_HEAD(magazinelist, magazine); 266 267 static spinlock_t zone_mag_lock; 268 static struct magazine zone_magazine = { 269 .flags = M_BURST | M_BURST_EARLY, 270 .capacity = M_ZONE_ROUNDS, 271 .rounds = 0, 272 .burst_factor = M_BURST_FACTOR, 273 .low_factor = M_LOW_ROUNDS 274 }; 275 276 #define MAGAZINE_FULL(mp) (mp->rounds == mp->capacity) 277 #define MAGAZINE_NOTFULL(mp) (mp->rounds < mp->capacity) 278 #define MAGAZINE_EMPTY(mp) (mp->rounds == 0) 279 #define MAGAZINE_NOTEMPTY(mp) (mp->rounds != 0) 280 281 /* Each thread will have a pair of magazines per size-class (NZONES) 282 * The loaded magazine will support immediate allocations, the previous 283 * magazine will either be full or empty and can be swapped at need */ 284 typedef struct magazine_pair { 285 struct magazine *loaded; 286 struct magazine *prev; 287 } magazine_pair; 288 289 /* A depot is a collection of magazines for a single zone. */ 290 typedef struct magazine_depot { 291 struct magazinelist full; 292 struct magazinelist empty; 293 pthread_spinlock_t lock; 294 } magazine_depot; 295 296 typedef struct thr_mags { 297 magazine_pair mags[NZONES]; 298 int init; 299 } thr_mags; 300 301 /* With this attribute set, do not require a function call for accessing 302 * this variable when the code is compiled -fPIC */ 303 #define TLS_ATTRIBUTE __attribute__ ((tls_model ("initial-exec"))); 304 305 static int mtmagazine_free_live; 306 static __thread thr_mags thread_mags TLS_ATTRIBUTE; 307 static pthread_key_t thread_mags_key; 308 static pthread_once_t thread_mags_once = PTHREAD_ONCE_INIT; 309 static magazine_depot depots[NZONES]; 310 311 /* 312 * Fixed globals (not per-cpu) 313 */ 314 static const int ZoneSize = ZALLOC_ZONE_SIZE; 315 static const int ZoneLimit = ZALLOC_ZONE_LIMIT; 316 static const int ZonePageCount = ZALLOC_ZONE_SIZE / PAGE_SIZE; 317 static const int ZoneMask = ZALLOC_ZONE_SIZE - 1; 318 319 static int opt_madvise = 0; 320 static int opt_utrace = 0; 321 static int malloc_started = 0; 322 static int g_malloc_flags = 0; 323 static spinlock_t malloc_init_lock; 324 static struct slglobaldata SLGlobalData; 325 static bigalloc_t bigalloc_array[BIGHSIZE]; 326 static spinlock_t bigspin_array[BIGXSIZE]; 327 static int malloc_panic; 328 static int malloc_dummy_pointer; 329 330 static const int32_t weirdary[16] = { 331 WEIRD_ADDR, WEIRD_ADDR, WEIRD_ADDR, WEIRD_ADDR, 332 WEIRD_ADDR, WEIRD_ADDR, WEIRD_ADDR, WEIRD_ADDR, 333 WEIRD_ADDR, WEIRD_ADDR, WEIRD_ADDR, WEIRD_ADDR, 334 WEIRD_ADDR, WEIRD_ADDR, WEIRD_ADDR, WEIRD_ADDR 335 }; 336 337 static void *_slaballoc(size_t size, int flags); 338 static void *_slabrealloc(void *ptr, size_t size); 339 static void _slabfree(void *ptr, int, bigalloc_t *); 340 static void *_vmem_alloc(size_t bytes, size_t align, int flags); 341 static void _vmem_free(void *ptr, size_t bytes); 342 static void *magazine_alloc(struct magazine *, int *); 343 static int magazine_free(struct magazine *, void *); 344 static void *mtmagazine_alloc(int zi); 345 static int mtmagazine_free(int zi, void *); 346 static void mtmagazine_init(void); 347 static void mtmagazine_destructor(void *); 348 static slzone_t zone_alloc(int flags); 349 static void zone_free(void *z); 350 static void _mpanic(const char *ctl, ...); 351 static void malloc_init(void); 352 #if defined(INVARIANTS) 353 static void chunk_mark_allocated(slzone_t z, void *chunk); 354 static void chunk_mark_free(slzone_t z, void *chunk); 355 #endif 356 357 struct nmalloc_utrace { 358 void *p; 359 size_t s; 360 void *r; 361 }; 362 363 #define UTRACE(a, b, c) \ 364 if (opt_utrace) { \ 365 struct nmalloc_utrace ut = { \ 366 .p = (a), \ 367 .s = (b), \ 368 .r = (c) \ 369 }; \ 370 utrace(&ut, sizeof(ut)); \ 371 } 372 373 #ifdef INVARIANTS 374 /* 375 * If enabled any memory allocated without M_ZERO is initialized to -1. 376 */ 377 static int use_malloc_pattern; 378 #endif 379 380 static void 381 malloc_init(void) 382 { 383 const char *p = NULL; 384 385 if (__isthreaded) { 386 _SPINLOCK(&malloc_init_lock); 387 if (malloc_started) { 388 _SPINUNLOCK(&malloc_init_lock); 389 return; 390 } 391 } 392 393 if (issetugid() == 0) 394 p = getenv("MALLOC_OPTIONS"); 395 396 for (; p != NULL && *p != '\0'; p++) { 397 switch(*p) { 398 case 'u': opt_utrace = 0; break; 399 case 'U': opt_utrace = 1; break; 400 case 'h': opt_madvise = 0; break; 401 case 'H': opt_madvise = 1; break; 402 case 'z': g_malloc_flags = 0; break; 403 case 'Z': g_malloc_flags = SAFLAG_ZERO; break; 404 default: 405 break; 406 } 407 } 408 409 malloc_started = 1; 410 411 if (__isthreaded) 412 _SPINUNLOCK(&malloc_init_lock); 413 414 UTRACE((void *) -1, 0, NULL); 415 } 416 417 /* 418 * We have to install a handler for nmalloc thread teardowns when 419 * the thread is created. We cannot delay this because destructors in 420 * sophisticated userland programs can call malloc() for the first time 421 * during their thread exit. 422 * 423 * This routine is called directly from pthreads. 424 */ 425 void 426 _nmalloc_thr_init(void) 427 { 428 thr_mags *tp; 429 430 /* 431 * Disallow mtmagazine operations until the mtmagazine is 432 * initialized. 433 */ 434 tp = &thread_mags; 435 tp->init = -1; 436 437 pthread_setspecific(thread_mags_key, tp); 438 if (mtmagazine_free_live == 0) { 439 mtmagazine_free_live = 1; 440 pthread_once(&thread_mags_once, mtmagazine_init); 441 } 442 tp->init = 1; 443 } 444 445 /* 446 * Thread locks. 447 */ 448 static __inline void 449 slgd_lock(slglobaldata_t slgd) 450 { 451 if (__isthreaded) 452 _SPINLOCK(&slgd->Spinlock); 453 } 454 455 static __inline void 456 slgd_unlock(slglobaldata_t slgd) 457 { 458 if (__isthreaded) 459 _SPINUNLOCK(&slgd->Spinlock); 460 } 461 462 static __inline void 463 depot_lock(magazine_depot *dp) 464 { 465 if (__isthreaded) 466 pthread_spin_lock(&dp->lock); 467 } 468 469 static __inline void 470 depot_unlock(magazine_depot *dp) 471 { 472 if (__isthreaded) 473 pthread_spin_unlock(&dp->lock); 474 } 475 476 static __inline void 477 zone_magazine_lock(void) 478 { 479 if (__isthreaded) 480 _SPINLOCK(&zone_mag_lock); 481 } 482 483 static __inline void 484 zone_magazine_unlock(void) 485 { 486 if (__isthreaded) 487 _SPINUNLOCK(&zone_mag_lock); 488 } 489 490 static __inline void 491 swap_mags(magazine_pair *mp) 492 { 493 struct magazine *tmp; 494 tmp = mp->loaded; 495 mp->loaded = mp->prev; 496 mp->prev = tmp; 497 } 498 499 /* 500 * bigalloc hashing and locking support. 501 * 502 * Return an unmasked hash code for the passed pointer. 503 */ 504 static __inline int 505 _bigalloc_hash(void *ptr) 506 { 507 int hv; 508 509 hv = ((int)(intptr_t)ptr >> PAGE_SHIFT) ^ 510 ((int)(intptr_t)ptr >> (PAGE_SHIFT + BIGHSHIFT)); 511 512 return(hv); 513 } 514 515 /* 516 * Lock the hash chain and return a pointer to its base for the specified 517 * address. 518 */ 519 static __inline bigalloc_t * 520 bigalloc_lock(void *ptr) 521 { 522 int hv = _bigalloc_hash(ptr); 523 bigalloc_t *bigp; 524 525 bigp = &bigalloc_array[hv & BIGHMASK]; 526 if (__isthreaded) 527 _SPINLOCK(&bigspin_array[hv & BIGXMASK]); 528 return(bigp); 529 } 530 531 /* 532 * Lock the hash chain and return a pointer to its base for the specified 533 * address. 534 * 535 * BUT, if the hash chain is empty, just return NULL and do not bother 536 * to lock anything. 537 */ 538 static __inline bigalloc_t * 539 bigalloc_check_and_lock(void *ptr) 540 { 541 int hv = _bigalloc_hash(ptr); 542 bigalloc_t *bigp; 543 544 bigp = &bigalloc_array[hv & BIGHMASK]; 545 if (*bigp == NULL) 546 return(NULL); 547 if (__isthreaded) { 548 _SPINLOCK(&bigspin_array[hv & BIGXMASK]); 549 } 550 return(bigp); 551 } 552 553 static __inline void 554 bigalloc_unlock(void *ptr) 555 { 556 int hv; 557 558 if (__isthreaded) { 559 hv = _bigalloc_hash(ptr); 560 _SPINUNLOCK(&bigspin_array[hv & BIGXMASK]); 561 } 562 } 563 564 /* 565 * Calculate the zone index for the allocation request size and set the 566 * allocation request size to that particular zone's chunk size. 567 */ 568 static __inline int 569 zoneindex(size_t *bytes, size_t *chunking) 570 { 571 size_t n = (unsigned int)*bytes; /* unsigned for shift opt */ 572 if (n < 128) { 573 *bytes = n = (n + 7) & ~7; 574 *chunking = 8; 575 return(n / 8 - 1); /* 8 byte chunks, 16 zones */ 576 } 577 if (n < 256) { 578 *bytes = n = (n + 15) & ~15; 579 *chunking = 16; 580 return(n / 16 + 7); 581 } 582 if (n < 8192) { 583 if (n < 512) { 584 *bytes = n = (n + 31) & ~31; 585 *chunking = 32; 586 return(n / 32 + 15); 587 } 588 if (n < 1024) { 589 *bytes = n = (n + 63) & ~63; 590 *chunking = 64; 591 return(n / 64 + 23); 592 } 593 if (n < 2048) { 594 *bytes = n = (n + 127) & ~127; 595 *chunking = 128; 596 return(n / 128 + 31); 597 } 598 if (n < 4096) { 599 *bytes = n = (n + 255) & ~255; 600 *chunking = 256; 601 return(n / 256 + 39); 602 } 603 *bytes = n = (n + 511) & ~511; 604 *chunking = 512; 605 return(n / 512 + 47); 606 } 607 #if ZALLOC_ZONE_LIMIT > 8192 608 if (n < 16384) { 609 *bytes = n = (n + 1023) & ~1023; 610 *chunking = 1024; 611 return(n / 1024 + 55); 612 } 613 #endif 614 #if ZALLOC_ZONE_LIMIT > 16384 615 if (n < 32768) { 616 *bytes = n = (n + 2047) & ~2047; 617 *chunking = 2048; 618 return(n / 2048 + 63); 619 } 620 #endif 621 _mpanic("Unexpected byte count %d", n); 622 return(0); 623 } 624 625 /* 626 * malloc() - call internal slab allocator 627 */ 628 void * 629 malloc(size_t size) 630 { 631 void *ptr; 632 633 ptr = _slaballoc(size, 0); 634 if (ptr == NULL) 635 errno = ENOMEM; 636 else 637 UTRACE(0, size, ptr); 638 return(ptr); 639 } 640 641 /* 642 * calloc() - call internal slab allocator 643 */ 644 void * 645 calloc(size_t number, size_t size) 646 { 647 void *ptr; 648 649 ptr = _slaballoc(number * size, SAFLAG_ZERO); 650 if (ptr == NULL) 651 errno = ENOMEM; 652 else 653 UTRACE(0, number * size, ptr); 654 return(ptr); 655 } 656 657 /* 658 * realloc() (SLAB ALLOCATOR) 659 * 660 * We do not attempt to optimize this routine beyond reusing the same 661 * pointer if the new size fits within the chunking of the old pointer's 662 * zone. 663 */ 664 void * 665 realloc(void *ptr, size_t size) 666 { 667 void *ret; 668 ret = _slabrealloc(ptr, size); 669 if (ret == NULL) 670 errno = ENOMEM; 671 else 672 UTRACE(ptr, size, ret); 673 return(ret); 674 } 675 676 /* 677 * posix_memalign() 678 * 679 * Allocate (size) bytes with a alignment of (alignment), where (alignment) 680 * is a power of 2 >= sizeof(void *). 681 * 682 * The slab allocator will allocate on power-of-2 boundaries up to 683 * at least PAGE_SIZE. We use the zoneindex mechanic to find a 684 * zone matching the requirements, and _vmem_alloc() otherwise. 685 */ 686 int 687 posix_memalign(void **memptr, size_t alignment, size_t size) 688 { 689 bigalloc_t *bigp; 690 bigalloc_t big; 691 size_t chunking; 692 int zi; 693 694 /* 695 * OpenGroup spec issue 6 checks 696 */ 697 if ((alignment | (alignment - 1)) + 1 != (alignment << 1)) { 698 *memptr = NULL; 699 return(EINVAL); 700 } 701 if (alignment < sizeof(void *)) { 702 *memptr = NULL; 703 return(EINVAL); 704 } 705 706 /* 707 * Our zone mechanism guarantees same-sized alignment for any 708 * power-of-2 allocation. If size is a power-of-2 and reasonable 709 * we can just call _slaballoc() and be done. We round size up 710 * to the nearest alignment boundary to improve our odds of 711 * it becoming a power-of-2 if it wasn't before. 712 */ 713 if (size <= alignment) 714 size = alignment; 715 else 716 size = (size + alignment - 1) & ~(size_t)(alignment - 1); 717 if (size < PAGE_SIZE && (size | (size - 1)) + 1 == (size << 1)) { 718 *memptr = _slaballoc(size, 0); 719 return(*memptr ? 0 : ENOMEM); 720 } 721 722 /* 723 * Otherwise locate a zone with a chunking that matches 724 * the requested alignment, within reason. Consider two cases: 725 * 726 * (1) A 1K allocation on a 32-byte alignment. The first zoneindex 727 * we find will be the best fit because the chunking will be 728 * greater or equal to the alignment. 729 * 730 * (2) A 513 allocation on a 256-byte alignment. In this case 731 * the first zoneindex we find will be for 576 byte allocations 732 * with a chunking of 64, which is not sufficient. To fix this 733 * we simply find the nearest power-of-2 >= size and use the 734 * same side-effect of _slaballoc() which guarantees 735 * same-alignment on a power-of-2 allocation. 736 */ 737 if (size < PAGE_SIZE) { 738 zi = zoneindex(&size, &chunking); 739 if (chunking >= alignment) { 740 *memptr = _slaballoc(size, 0); 741 return(*memptr ? 0 : ENOMEM); 742 } 743 if (size >= 1024) 744 alignment = 1024; 745 if (size >= 16384) 746 alignment = 16384; 747 while (alignment < size) 748 alignment <<= 1; 749 *memptr = _slaballoc(alignment, 0); 750 return(*memptr ? 0 : ENOMEM); 751 } 752 753 /* 754 * If the slab allocator cannot handle it use vmem_alloc(). 755 * 756 * Alignment must be adjusted up to at least PAGE_SIZE in this case. 757 */ 758 if (alignment < PAGE_SIZE) 759 alignment = PAGE_SIZE; 760 if (size < alignment) 761 size = alignment; 762 size = (size + PAGE_MASK) & ~(size_t)PAGE_MASK; 763 *memptr = _vmem_alloc(size, alignment, 0); 764 if (*memptr == NULL) 765 return(ENOMEM); 766 767 big = _slaballoc(sizeof(struct bigalloc), 0); 768 if (big == NULL) { 769 _vmem_free(*memptr, size); 770 *memptr = NULL; 771 return(ENOMEM); 772 } 773 bigp = bigalloc_lock(*memptr); 774 big->base = *memptr; 775 big->bytes = size; 776 big->next = *bigp; 777 *bigp = big; 778 bigalloc_unlock(*memptr); 779 780 return(0); 781 } 782 783 /* 784 * free() (SLAB ALLOCATOR) - do the obvious 785 */ 786 void 787 free(void *ptr) 788 { 789 UTRACE(ptr, 0, 0); 790 _slabfree(ptr, 0, NULL); 791 } 792 793 /* 794 * _slaballoc() (SLAB ALLOCATOR) 795 * 796 * Allocate memory via the slab allocator. If the request is too large, 797 * or if it page-aligned beyond a certain size, we fall back to the 798 * KMEM subsystem 799 */ 800 static void * 801 _slaballoc(size_t size, int flags) 802 { 803 slzone_t z; 804 slchunk_t chunk; 805 slglobaldata_t slgd; 806 size_t chunking; 807 int zi; 808 #ifdef INVARIANTS 809 int i; 810 #endif 811 int off; 812 void *obj; 813 814 if (!malloc_started) 815 malloc_init(); 816 817 /* 818 * Handle the degenerate size == 0 case. Yes, this does happen. 819 * Return a special pointer. This is to maintain compatibility with 820 * the original malloc implementation. Certain devices, such as the 821 * adaptec driver, not only allocate 0 bytes, they check for NULL and 822 * also realloc() later on. Joy. 823 */ 824 if (size == 0) 825 return(ZERO_LENGTH_PTR); 826 827 /* Capture global flags */ 828 flags |= g_malloc_flags; 829 830 /* 831 * Handle large allocations directly. There should not be very many 832 * of these so performance is not a big issue. 833 * 834 * The backend allocator is pretty nasty on a SMP system. Use the 835 * slab allocator for one and two page-sized chunks even though we 836 * lose some efficiency. 837 */ 838 if (size >= ZoneLimit || 839 ((size & PAGE_MASK) == 0 && size > PAGE_SIZE*2)) { 840 bigalloc_t big; 841 bigalloc_t *bigp; 842 843 size = (size + PAGE_MASK) & ~(size_t)PAGE_MASK; 844 chunk = _vmem_alloc(size, PAGE_SIZE, flags); 845 if (chunk == NULL) 846 return(NULL); 847 848 big = _slaballoc(sizeof(struct bigalloc), 0); 849 if (big == NULL) { 850 _vmem_free(chunk, size); 851 return(NULL); 852 } 853 bigp = bigalloc_lock(chunk); 854 big->base = chunk; 855 big->bytes = size; 856 big->next = *bigp; 857 *bigp = big; 858 bigalloc_unlock(chunk); 859 860 return(chunk); 861 } 862 863 /* Compute allocation zone; zoneindex will panic on excessive sizes */ 864 zi = zoneindex(&size, &chunking); 865 MASSERT(zi < NZONES); 866 867 obj = mtmagazine_alloc(zi); 868 if (obj != NULL) { 869 if (flags & SAFLAG_ZERO) 870 bzero(obj, size); 871 return (obj); 872 } 873 874 slgd = &SLGlobalData; 875 slgd_lock(slgd); 876 877 /* 878 * Attempt to allocate out of an existing zone. If all zones are 879 * exhausted pull one off the free list or allocate a new one. 880 */ 881 if ((z = slgd->ZoneAry[zi]) == NULL) { 882 883 z = zone_alloc(flags); 884 if (z == NULL) 885 goto fail; 886 887 /* 888 * How big is the base structure? 889 */ 890 #if defined(INVARIANTS) 891 /* 892 * Make room for z_Bitmap. An exact calculation is 893 * somewhat more complicated so don't make an exact 894 * calculation. 895 */ 896 off = offsetof(struct slzone, 897 z_Bitmap[(ZoneSize / size + 31) / 32]); 898 bzero(z->z_Bitmap, (ZoneSize / size + 31) / 8); 899 #else 900 off = sizeof(struct slzone); 901 #endif 902 903 /* 904 * Align the storage in the zone based on the chunking. 905 * 906 * Guarantee power-of-2 alignment for power-of-2-sized 907 * chunks. Otherwise align based on the chunking size 908 * (typically 8 or 16 bytes for small allocations). 909 * 910 * NOTE: Allocations >= ZoneLimit are governed by the 911 * bigalloc code and typically only guarantee page-alignment. 912 * 913 * Set initial conditions for UIndex near the zone header 914 * to reduce unecessary page faults, vs semi-randomization 915 * to improve L1 cache saturation. 916 */ 917 if ((size | (size - 1)) + 1 == (size << 1)) 918 off = (off + size - 1) & ~(size - 1); 919 else 920 off = (off + chunking - 1) & ~(chunking - 1); 921 z->z_Magic = ZALLOC_SLAB_MAGIC; 922 z->z_ZoneIndex = zi; 923 z->z_NMax = (ZoneSize - off) / size; 924 z->z_NFree = z->z_NMax; 925 z->z_BasePtr = (char *)z + off; 926 z->z_UIndex = z->z_UEndIndex = 0; 927 z->z_ChunkSize = size; 928 z->z_FirstFreePg = ZonePageCount; 929 z->z_Next = slgd->ZoneAry[zi]; 930 slgd->ZoneAry[zi] = z; 931 if ((z->z_Flags & SLZF_UNOTZEROD) == 0) { 932 flags &= ~SAFLAG_ZERO; /* already zero'd */ 933 flags |= SAFLAG_PASSIVE; 934 } 935 936 /* 937 * Slide the base index for initial allocations out of the 938 * next zone we create so we do not over-weight the lower 939 * part of the cpu memory caches. 940 */ 941 slgd->JunkIndex = (slgd->JunkIndex + ZALLOC_SLAB_SLIDE) 942 & (ZALLOC_MAX_ZONE_SIZE - 1); 943 } 944 945 /* 946 * Ok, we have a zone from which at least one chunk is available. 947 * 948 * Remove us from the ZoneAry[] when we become empty 949 */ 950 MASSERT(z->z_NFree > 0); 951 952 if (--z->z_NFree == 0) { 953 slgd->ZoneAry[zi] = z->z_Next; 954 z->z_Next = NULL; 955 } 956 957 /* 958 * Locate a chunk in a free page. This attempts to localize 959 * reallocations into earlier pages without us having to sort 960 * the chunk list. A chunk may still overlap a page boundary. 961 */ 962 while (z->z_FirstFreePg < ZonePageCount) { 963 if ((chunk = z->z_PageAry[z->z_FirstFreePg]) != NULL) { 964 #ifdef DIAGNOSTIC 965 /* 966 * Diagnostic: c_Next is not total garbage. 967 */ 968 MASSERT(chunk->c_Next == NULL || 969 ((intptr_t)chunk->c_Next & IN_SAME_PAGE_MASK) == 970 ((intptr_t)chunk & IN_SAME_PAGE_MASK)); 971 #endif 972 #ifdef INVARIANTS 973 chunk_mark_allocated(z, chunk); 974 #endif 975 MASSERT((uintptr_t)chunk & ZoneMask); 976 z->z_PageAry[z->z_FirstFreePg] = chunk->c_Next; 977 goto done; 978 } 979 ++z->z_FirstFreePg; 980 } 981 982 /* 983 * No chunks are available but NFree said we had some memory, 984 * so it must be available in the never-before-used-memory 985 * area governed by UIndex. The consequences are very 986 * serious if our zone got corrupted so we use an explicit 987 * panic rather then a KASSERT. 988 */ 989 chunk = (slchunk_t)(z->z_BasePtr + z->z_UIndex * size); 990 991 if (++z->z_UIndex == z->z_NMax) 992 z->z_UIndex = 0; 993 if (z->z_UIndex == z->z_UEndIndex) { 994 if (z->z_NFree != 0) 995 _mpanic("slaballoc: corrupted zone"); 996 } 997 998 if ((z->z_Flags & SLZF_UNOTZEROD) == 0) { 999 flags &= ~SAFLAG_ZERO; 1000 flags |= SAFLAG_PASSIVE; 1001 } 1002 #if defined(INVARIANTS) 1003 chunk_mark_allocated(z, chunk); 1004 #endif 1005 1006 done: 1007 slgd_unlock(slgd); 1008 if (flags & SAFLAG_ZERO) { 1009 bzero(chunk, size); 1010 #ifdef INVARIANTS 1011 } else if ((flags & (SAFLAG_ZERO|SAFLAG_PASSIVE)) == 0) { 1012 if (use_malloc_pattern) { 1013 for (i = 0; i < size; i += sizeof(int)) { 1014 *(int *)((char *)chunk + i) = -1; 1015 } 1016 } 1017 /* avoid accidental double-free check */ 1018 chunk->c_Next = (void *)-1; 1019 #endif 1020 } 1021 return(chunk); 1022 fail: 1023 slgd_unlock(slgd); 1024 return(NULL); 1025 } 1026 1027 /* 1028 * Reallocate memory within the chunk 1029 */ 1030 static void * 1031 _slabrealloc(void *ptr, size_t size) 1032 { 1033 bigalloc_t *bigp; 1034 void *nptr; 1035 slzone_t z; 1036 size_t chunking; 1037 1038 if (ptr == NULL || ptr == ZERO_LENGTH_PTR) 1039 return(_slaballoc(size, 0)); 1040 1041 if (size == 0) { 1042 free(ptr); 1043 return(ZERO_LENGTH_PTR); 1044 } 1045 1046 /* 1047 * Handle oversized allocations. 1048 */ 1049 if ((bigp = bigalloc_check_and_lock(ptr)) != NULL) { 1050 bigalloc_t big; 1051 size_t bigbytes; 1052 1053 while ((big = *bigp) != NULL) { 1054 if (big->base == ptr) { 1055 size = (size + PAGE_MASK) & ~(size_t)PAGE_MASK; 1056 bigbytes = big->bytes; 1057 if (bigbytes == size) { 1058 bigalloc_unlock(ptr); 1059 return(ptr); 1060 } 1061 *bigp = big->next; 1062 bigalloc_unlock(ptr); 1063 if ((nptr = _slaballoc(size, 0)) == NULL) { 1064 /* Relink block */ 1065 bigp = bigalloc_lock(ptr); 1066 big->next = *bigp; 1067 *bigp = big; 1068 bigalloc_unlock(ptr); 1069 return(NULL); 1070 } 1071 if (size > bigbytes) 1072 size = bigbytes; 1073 bcopy(ptr, nptr, size); 1074 _slabfree(ptr, FASTSLABREALLOC, &big); 1075 return(nptr); 1076 } 1077 bigp = &big->next; 1078 } 1079 bigalloc_unlock(ptr); 1080 } 1081 1082 /* 1083 * Get the original allocation's zone. If the new request winds 1084 * up using the same chunk size we do not have to do anything. 1085 * 1086 * NOTE: We don't have to lock the globaldata here, the fields we 1087 * access here will not change at least as long as we have control 1088 * over the allocation. 1089 */ 1090 z = (slzone_t)((uintptr_t)ptr & ~(uintptr_t)ZoneMask); 1091 MASSERT(z->z_Magic == ZALLOC_SLAB_MAGIC); 1092 1093 /* 1094 * Use zoneindex() to chunk-align the new size, as long as the 1095 * new size is not too large. 1096 */ 1097 if (size < ZoneLimit) { 1098 zoneindex(&size, &chunking); 1099 if (z->z_ChunkSize == size) 1100 return(ptr); 1101 } 1102 1103 /* 1104 * Allocate memory for the new request size and copy as appropriate. 1105 */ 1106 if ((nptr = _slaballoc(size, 0)) != NULL) { 1107 if (size > z->z_ChunkSize) 1108 size = z->z_ChunkSize; 1109 bcopy(ptr, nptr, size); 1110 _slabfree(ptr, 0, NULL); 1111 } 1112 1113 return(nptr); 1114 } 1115 1116 /* 1117 * free (SLAB ALLOCATOR) 1118 * 1119 * Free a memory block previously allocated by malloc. Note that we do not 1120 * attempt to uplodate ks_loosememuse as MP races could prevent us from 1121 * checking memory limits in malloc. 1122 * 1123 * flags: 1124 * FASTSLABREALLOC Fast call from realloc 1125 * MPSAFE 1126 */ 1127 static void 1128 _slabfree(void *ptr, int flags, bigalloc_t *rbigp) 1129 { 1130 slzone_t z; 1131 slchunk_t chunk; 1132 bigalloc_t big; 1133 bigalloc_t *bigp; 1134 slglobaldata_t slgd; 1135 size_t size; 1136 int zi; 1137 int pgno; 1138 1139 /* Fast realloc path for big allocations */ 1140 if (flags & FASTSLABREALLOC) { 1141 big = *rbigp; 1142 goto fastslabrealloc; 1143 } 1144 1145 /* 1146 * Handle NULL frees and special 0-byte allocations 1147 */ 1148 if (ptr == NULL) 1149 return; 1150 if (ptr == ZERO_LENGTH_PTR) 1151 return; 1152 1153 /* 1154 * Handle oversized allocations. 1155 */ 1156 if ((bigp = bigalloc_check_and_lock(ptr)) != NULL) { 1157 while ((big = *bigp) != NULL) { 1158 if (big->base == ptr) { 1159 if ((flags & FASTSLABREALLOC) == 0) { 1160 *bigp = big->next; 1161 bigalloc_unlock(ptr); 1162 } 1163 fastslabrealloc: 1164 size = big->bytes; 1165 _slabfree(big, 0, NULL); 1166 #ifdef INVARIANTS 1167 MASSERT(sizeof(weirdary) <= size); 1168 bcopy(weirdary, ptr, sizeof(weirdary)); 1169 #endif 1170 _vmem_free(ptr, size); 1171 return; 1172 } 1173 bigp = &big->next; 1174 } 1175 bigalloc_unlock(ptr); 1176 } 1177 1178 /* 1179 * Zone case. Figure out the zone based on the fact that it is 1180 * ZoneSize aligned. 1181 */ 1182 z = (slzone_t)((uintptr_t)ptr & ~(uintptr_t)ZoneMask); 1183 MASSERT(z->z_Magic == ZALLOC_SLAB_MAGIC); 1184 1185 size = z->z_ChunkSize; 1186 zi = z->z_ZoneIndex; 1187 1188 if (g_malloc_flags & SAFLAG_ZERO) 1189 bzero(ptr, size); 1190 1191 if (mtmagazine_free(zi, ptr) == 0) 1192 return; 1193 1194 pgno = ((char *)ptr - (char *)z) >> PAGE_SHIFT; 1195 chunk = ptr; 1196 slgd = &SLGlobalData; 1197 slgd_lock(slgd); 1198 1199 #ifdef INVARIANTS 1200 /* 1201 * Attempt to detect a double-free. To reduce overhead we only check 1202 * if there appears to be link pointer at the base of the data. 1203 */ 1204 if (((intptr_t)chunk->c_Next - (intptr_t)z) >> PAGE_SHIFT == pgno) { 1205 slchunk_t scan; 1206 1207 for (scan = z->z_PageAry[pgno]; scan; scan = scan->c_Next) { 1208 if (scan == chunk) 1209 _mpanic("Double free at %p", chunk); 1210 } 1211 } 1212 chunk_mark_free(z, chunk); 1213 #endif 1214 1215 /* 1216 * Put weird data into the memory to detect modifications after 1217 * freeing, illegal pointer use after freeing (we should fault on 1218 * the odd address), and so forth. 1219 */ 1220 #ifdef INVARIANTS 1221 if (z->z_ChunkSize < sizeof(weirdary)) 1222 bcopy(weirdary, chunk, z->z_ChunkSize); 1223 else 1224 bcopy(weirdary, chunk, sizeof(weirdary)); 1225 #endif 1226 1227 /* 1228 * Add this free non-zero'd chunk to a linked list for reuse, adjust 1229 * z_FirstFreePg. 1230 */ 1231 chunk->c_Next = z->z_PageAry[pgno]; 1232 z->z_PageAry[pgno] = chunk; 1233 if (z->z_FirstFreePg > pgno) 1234 z->z_FirstFreePg = pgno; 1235 1236 /* 1237 * Bump the number of free chunks. If it becomes non-zero the zone 1238 * must be added back onto the appropriate list. 1239 */ 1240 if (z->z_NFree++ == 0) { 1241 z->z_Next = slgd->ZoneAry[z->z_ZoneIndex]; 1242 slgd->ZoneAry[z->z_ZoneIndex] = z; 1243 } 1244 1245 /* 1246 * If the zone becomes totally free then release it. 1247 */ 1248 if (z->z_NFree == z->z_NMax) { 1249 slzone_t *pz; 1250 1251 pz = &slgd->ZoneAry[z->z_ZoneIndex]; 1252 while (z != *pz) 1253 pz = &(*pz)->z_Next; 1254 *pz = z->z_Next; 1255 z->z_Magic = -1; 1256 z->z_Next = NULL; 1257 zone_free(z); 1258 return; 1259 } 1260 slgd_unlock(slgd); 1261 } 1262 1263 #if defined(INVARIANTS) 1264 /* 1265 * Helper routines for sanity checks 1266 */ 1267 static 1268 void 1269 chunk_mark_allocated(slzone_t z, void *chunk) 1270 { 1271 int bitdex = ((char *)chunk - (char *)z->z_BasePtr) / z->z_ChunkSize; 1272 __uint32_t *bitptr; 1273 1274 MASSERT(bitdex >= 0 && bitdex < z->z_NMax); 1275 bitptr = &z->z_Bitmap[bitdex >> 5]; 1276 bitdex &= 31; 1277 MASSERT((*bitptr & (1 << bitdex)) == 0); 1278 *bitptr |= 1 << bitdex; 1279 } 1280 1281 static 1282 void 1283 chunk_mark_free(slzone_t z, void *chunk) 1284 { 1285 int bitdex = ((char *)chunk - (char *)z->z_BasePtr) / z->z_ChunkSize; 1286 __uint32_t *bitptr; 1287 1288 MASSERT(bitdex >= 0 && bitdex < z->z_NMax); 1289 bitptr = &z->z_Bitmap[bitdex >> 5]; 1290 bitdex &= 31; 1291 MASSERT((*bitptr & (1 << bitdex)) != 0); 1292 *bitptr &= ~(1 << bitdex); 1293 } 1294 1295 #endif 1296 1297 static __inline void * 1298 magazine_alloc(struct magazine *mp, int *burst) 1299 { 1300 void *obj = NULL; 1301 1302 do { 1303 if (mp != NULL && MAGAZINE_NOTEMPTY(mp)) { 1304 obj = mp->objects[--mp->rounds]; 1305 break; 1306 } 1307 1308 /* Return burst factor to caller */ 1309 if ((mp->flags & M_BURST) && (burst != NULL)) { 1310 *burst = mp->burst_factor; 1311 } 1312 1313 /* Reduce burst factor by NSCALE; if it hits 1, disable BURST */ 1314 if ((mp->flags & M_BURST) && (mp->flags & M_BURST_EARLY) && 1315 (burst != NULL)) { 1316 mp->burst_factor -= M_BURST_NSCALE; 1317 if (mp->burst_factor <= 1) { 1318 mp->burst_factor = 1; 1319 mp->flags &= ~(M_BURST); 1320 mp->flags &= ~(M_BURST_EARLY); 1321 } 1322 } 1323 1324 } while (0); 1325 1326 return obj; 1327 } 1328 1329 static __inline int 1330 magazine_free(struct magazine *mp, void *p) 1331 { 1332 if (mp != NULL && MAGAZINE_NOTFULL(mp)) { 1333 mp->objects[mp->rounds++] = p; 1334 return 0; 1335 } 1336 1337 return -1; 1338 } 1339 1340 static void * 1341 mtmagazine_alloc(int zi) 1342 { 1343 thr_mags *tp; 1344 struct magazine *mp, *emptymag; 1345 magazine_depot *d; 1346 void *obj = NULL; 1347 1348 /* 1349 * Do not try to access per-thread magazines while the mtmagazine 1350 * is being initialized or destroyed. 1351 */ 1352 tp = &thread_mags; 1353 if (tp->init < 0) 1354 return(NULL); 1355 1356 /* 1357 * Primary per-thread allocation loop 1358 */ 1359 for (;;) { 1360 /* If the loaded magazine has rounds, allocate and return */ 1361 if (((mp = tp->mags[zi].loaded) != NULL) && 1362 MAGAZINE_NOTEMPTY(mp)) { 1363 obj = magazine_alloc(mp, NULL); 1364 break; 1365 } 1366 1367 /* If the prev magazine is full, swap with loaded and retry */ 1368 if (((mp = tp->mags[zi].prev) != NULL) && 1369 MAGAZINE_FULL(mp)) { 1370 swap_mags(&tp->mags[zi]); 1371 continue; 1372 } 1373 1374 /* Lock the depot and check if it has any full magazines; if so 1375 * we return the prev to the emptymag list, move loaded to prev 1376 * load a full magazine, and retry */ 1377 d = &depots[zi]; 1378 depot_lock(d); 1379 1380 if (!SLIST_EMPTY(&d->full)) { 1381 emptymag = tp->mags[zi].prev; 1382 tp->mags[zi].prev = tp->mags[zi].loaded; 1383 tp->mags[zi].loaded = SLIST_FIRST(&d->full); 1384 SLIST_REMOVE_HEAD(&d->full, nextmagazine); 1385 1386 /* Return emptymag to the depot */ 1387 if (emptymag != NULL) 1388 SLIST_INSERT_HEAD(&d->empty, emptymag, nextmagazine); 1389 1390 depot_unlock(d); 1391 continue; 1392 } else { 1393 depot_unlock(d); 1394 } 1395 break; 1396 } 1397 1398 return (obj); 1399 } 1400 1401 static int 1402 mtmagazine_free(int zi, void *ptr) 1403 { 1404 thr_mags *tp; 1405 struct magazine *mp, *loadedmag, *newmag; 1406 magazine_depot *d; 1407 int rc = -1; 1408 1409 /* 1410 * Do not try to access per-thread magazines while the mtmagazine 1411 * is being initialized or destroyed. 1412 */ 1413 tp = &thread_mags; 1414 if (tp->init < 0) 1415 return(-1); 1416 1417 /* 1418 * Primary per-thread freeing loop 1419 */ 1420 for (;;) { 1421 /* If the loaded magazine has space, free directly to it */ 1422 if (((mp = tp->mags[zi].loaded) != NULL) && 1423 MAGAZINE_NOTFULL(mp)) { 1424 rc = magazine_free(mp, ptr); 1425 break; 1426 } 1427 1428 /* If the prev magazine is empty, swap with loaded and retry */ 1429 if (((mp = tp->mags[zi].prev) != NULL) && 1430 MAGAZINE_EMPTY(mp)) { 1431 swap_mags(&tp->mags[zi]); 1432 continue; 1433 } 1434 1435 /* Lock the depot; if there are any empty magazines, move the 1436 * prev to the depot's fullmag list, move loaded to previous, 1437 * and move a new emptymag to loaded, and retry. */ 1438 1439 d = &depots[zi]; 1440 depot_lock(d); 1441 1442 if (!SLIST_EMPTY(&d->empty)) { 1443 loadedmag = tp->mags[zi].prev; 1444 tp->mags[zi].prev = tp->mags[zi].loaded; 1445 tp->mags[zi].loaded = SLIST_FIRST(&d->empty); 1446 SLIST_REMOVE_HEAD(&d->empty, nextmagazine); 1447 1448 /* Return loadedmag to the depot */ 1449 if (loadedmag != NULL) 1450 SLIST_INSERT_HEAD(&d->full, loadedmag, 1451 nextmagazine); 1452 depot_unlock(d); 1453 continue; 1454 } 1455 1456 /* Allocate an empty magazine, add it to the depot, retry */ 1457 newmag = _slaballoc(sizeof(struct magazine), SAFLAG_ZERO); 1458 if (newmag != NULL) { 1459 newmag->capacity = M_MAX_ROUNDS; 1460 newmag->rounds = 0; 1461 1462 SLIST_INSERT_HEAD(&d->empty, newmag, nextmagazine); 1463 depot_unlock(d); 1464 continue; 1465 } else { 1466 depot_unlock(d); 1467 rc = -1; 1468 } 1469 break; 1470 } 1471 1472 return rc; 1473 } 1474 1475 static void 1476 mtmagazine_init(void) 1477 { 1478 int error; 1479 1480 error = pthread_key_create(&thread_mags_key, mtmagazine_destructor); 1481 if (error) 1482 abort(); 1483 } 1484 1485 /* 1486 * This function is only used by the thread exit destructor 1487 */ 1488 static void 1489 mtmagazine_drain(struct magazine *mp) 1490 { 1491 void *obj; 1492 1493 while (MAGAZINE_NOTEMPTY(mp)) { 1494 obj = magazine_alloc(mp, NULL); 1495 _slabfree(obj, 0, NULL); 1496 } 1497 } 1498 1499 /* 1500 * mtmagazine_destructor() 1501 * 1502 * When a thread exits, we reclaim all its resources; all its magazines are 1503 * drained and the structures are freed. 1504 * 1505 * WARNING! The destructor can be called multiple times if the larger user 1506 * program has its own destructors which run after ours which 1507 * allocate or free memory. 1508 */ 1509 static void 1510 mtmagazine_destructor(void *thrp) 1511 { 1512 thr_mags *tp = thrp; 1513 struct magazine *mp; 1514 int i; 1515 1516 /* 1517 * Prevent further use of mtmagazines while we are destructing 1518 * them, as well as for any destructors which are run after us 1519 * prior to the thread actually being destroyed. 1520 */ 1521 tp->init = -1; 1522 1523 for (i = 0; i < NZONES; i++) { 1524 mp = tp->mags[i].loaded; 1525 tp->mags[i].loaded = NULL; 1526 if (mp != NULL && MAGAZINE_NOTEMPTY(mp)) 1527 mtmagazine_drain(mp); 1528 _slabfree(mp, 0, NULL); 1529 1530 mp = tp->mags[i].prev; 1531 tp->mags[i].prev = NULL; 1532 if (mp != NULL && MAGAZINE_NOTEMPTY(mp)) 1533 mtmagazine_drain(mp); 1534 _slabfree(mp, 0, NULL); 1535 } 1536 } 1537 1538 /* 1539 * zone_alloc() 1540 * 1541 * Attempt to allocate a zone from the zone magazine; the zone magazine has 1542 * M_BURST_EARLY enabled, so honor the burst request from the magazine. 1543 */ 1544 static slzone_t 1545 zone_alloc(int flags) 1546 { 1547 slglobaldata_t slgd = &SLGlobalData; 1548 int burst = 1; 1549 int i, j; 1550 slzone_t z; 1551 1552 zone_magazine_lock(); 1553 slgd_unlock(slgd); 1554 1555 z = magazine_alloc(&zone_magazine, &burst); 1556 if (z == NULL) { 1557 if (burst == 1) 1558 zone_magazine_unlock(); 1559 1560 z = _vmem_alloc(ZoneSize * burst, ZoneSize, flags); 1561 if (z == NULL) { 1562 zone_magazine_unlock(); 1563 slgd_lock(slgd); 1564 return (NULL); 1565 } 1566 1567 for (i = 1; i < burst; i++) { 1568 j = magazine_free(&zone_magazine, 1569 (char *) z + (ZoneSize * i)); 1570 MASSERT(j == 0); 1571 } 1572 1573 if (burst != 1) 1574 zone_magazine_unlock(); 1575 } else { 1576 z->z_Flags |= SLZF_UNOTZEROD; 1577 zone_magazine_unlock(); 1578 } 1579 1580 slgd_lock(slgd); 1581 return z; 1582 } 1583 1584 /* 1585 * zone_free() 1586 * 1587 * Releases the slgd lock prior to unmap, if unmapping is necessary 1588 */ 1589 static void 1590 zone_free(void *z) 1591 { 1592 slglobaldata_t slgd = &SLGlobalData; 1593 void *excess[M_ZONE_ROUNDS - M_LOW_ROUNDS] = {}; 1594 int i, j; 1595 1596 zone_magazine_lock(); 1597 slgd_unlock(slgd); 1598 1599 bzero(z, sizeof(struct slzone)); 1600 1601 if (opt_madvise) 1602 madvise(z, ZoneSize, MADV_FREE); 1603 1604 i = magazine_free(&zone_magazine, z); 1605 1606 /* If we failed to free, collect excess magazines; release the zone 1607 * magazine lock, and then free to the system via _vmem_free. Re-enable 1608 * BURST mode for the magazine. */ 1609 if (i == -1) { 1610 j = zone_magazine.rounds - zone_magazine.low_factor; 1611 for (i = 0; i < j; i++) { 1612 excess[i] = magazine_alloc(&zone_magazine, NULL); 1613 MASSERT(excess[i] != NULL); 1614 } 1615 1616 zone_magazine_unlock(); 1617 1618 for (i = 0; i < j; i++) 1619 _vmem_free(excess[i], ZoneSize); 1620 1621 _vmem_free(z, ZoneSize); 1622 } else { 1623 zone_magazine_unlock(); 1624 } 1625 } 1626 1627 /* 1628 * _vmem_alloc() 1629 * 1630 * Directly map memory in PAGE_SIZE'd chunks with the specified 1631 * alignment. 1632 * 1633 * Alignment must be a multiple of PAGE_SIZE. 1634 * 1635 * Size must be >= alignment. 1636 */ 1637 static void * 1638 _vmem_alloc(size_t size, size_t align, int flags) 1639 { 1640 char *addr; 1641 char *save; 1642 size_t excess; 1643 1644 /* 1645 * Map anonymous private memory. 1646 */ 1647 addr = mmap(NULL, size, PROT_READ|PROT_WRITE, 1648 MAP_PRIVATE|MAP_ANON, -1, 0); 1649 if (addr == MAP_FAILED) 1650 return(NULL); 1651 1652 /* 1653 * Check alignment. The misaligned offset is also the excess 1654 * amount. If misaligned unmap the excess so we have a chance of 1655 * mapping at the next alignment point and recursively try again. 1656 * 1657 * BBBBBBBBBBB BBBBBBBBBBB BBBBBBBBBBB block alignment 1658 * aaaaaaaaa aaaaaaaaaaa aa mis-aligned allocation 1659 * xxxxxxxxx final excess calculation 1660 * ^ returned address 1661 */ 1662 excess = (uintptr_t)addr & (align - 1); 1663 1664 if (excess) { 1665 excess = align - excess; 1666 save = addr; 1667 1668 munmap(save + excess, size - excess); 1669 addr = _vmem_alloc(size, align, flags); 1670 munmap(save, excess); 1671 } 1672 return((void *)addr); 1673 } 1674 1675 /* 1676 * _vmem_free() 1677 * 1678 * Free a chunk of memory allocated with _vmem_alloc() 1679 */ 1680 static void 1681 _vmem_free(void *ptr, size_t size) 1682 { 1683 munmap(ptr, size); 1684 } 1685 1686 /* 1687 * Panic on fatal conditions 1688 */ 1689 static void 1690 _mpanic(const char *ctl, ...) 1691 { 1692 va_list va; 1693 1694 if (malloc_panic == 0) { 1695 malloc_panic = 1; 1696 va_start(va, ctl); 1697 vfprintf(stderr, ctl, va); 1698 fprintf(stderr, "\n"); 1699 fflush(stderr); 1700 va_end(va); 1701 } 1702 abort(); 1703 } 1704