1 /* 2 * NMALLOC.C - New Malloc (ported from kernel slab allocator) 3 * 4 * Copyright (c) 2003,2004,2009,2010 The DragonFly Project. All rights reserved. 5 * 6 * This code is derived from software contributed to The DragonFly Project 7 * by Matthew Dillon <dillon@backplane.com> and by 8 * Venkatesh Srinivas <me@endeavour.zapto.org>. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in 18 * the documentation and/or other materials provided with the 19 * distribution. 20 * 3. Neither the name of The DragonFly Project nor the names of its 21 * contributors may be used to endorse or promote products derived 22 * from this software without specific, prior written permission. 23 * 24 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 25 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 26 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 27 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 28 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 29 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, 30 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 31 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 32 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 33 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 34 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 35 * SUCH DAMAGE. 36 * 37 * $Id: nmalloc.c,v 1.37 2010/07/23 08:20:35 vsrinivas Exp $ 38 */ 39 /* 40 * This module implements a slab allocator drop-in replacement for the 41 * libc malloc(). 42 * 43 * A slab allocator reserves a ZONE for each chunk size, then lays the 44 * chunks out in an array within the zone. Allocation and deallocation 45 * is nearly instantaneous, and overhead losses are limited to a fixed 46 * worst-case amount. 47 * 48 * The slab allocator does not have to pre-initialize the list of 49 * free chunks for each zone, and the underlying VM will not be 50 * touched at all beyond the zone header until an actual allocation 51 * needs it. 52 * 53 * Slab management and locking is done on a per-zone basis. 54 * 55 * Alloc Size Chunking Number of zones 56 * 0-127 8 16 57 * 128-255 16 8 58 * 256-511 32 8 59 * 512-1023 64 8 60 * 1024-2047 128 8 61 * 2048-4095 256 8 62 * 4096-8191 512 8 63 * 8192-16383 1024 8 64 * 16384-32767 2048 8 65 * 66 * Allocations >= ZoneLimit (16K) go directly to mmap and a hash table 67 * is used to locate for free. One and Two-page allocations use the 68 * zone mechanic to avoid excessive mmap()/munmap() calls. 69 * 70 * API FEATURES AND SIDE EFFECTS 71 * 72 * + power-of-2 sized allocations up to a page will be power-of-2 aligned. 73 * Above that power-of-2 sized allocations are page-aligned. Non 74 * power-of-2 sized allocations are aligned the same as the chunk 75 * size for their zone. 76 * + malloc(0) returns a special non-NULL value 77 * + ability to allocate arbitrarily large chunks of memory 78 * + realloc will reuse the passed pointer if possible, within the 79 * limitations of the zone chunking. 80 * 81 * Multithreaded enhancements for small allocations introduced August 2010. 82 * These are in the spirit of 'libumem'. See: 83 * Bonwick, J.; Adams, J. (2001). "Magazines and Vmem: Extending the 84 * slab allocator to many CPUs and arbitrary resources". In Proc. 2001 85 * USENIX Technical Conference. USENIX Association. 86 * 87 * Oversized allocations employ the BIGCACHE mechanic whereby large 88 * allocations may be handed significantly larger buffers, allowing them 89 * to avoid mmap/munmap operations even through significant realloc()s. 90 * The excess space is only trimmed if too many large allocations have been 91 * given this treatment. 92 * 93 * TUNING 94 * 95 * The value of the environment variable MALLOC_OPTIONS is a character string 96 * containing various flags to tune nmalloc. 97 * 98 * 'U' / ['u'] Generate / do not generate utrace entries for ktrace(1) 99 * This will generate utrace events for all malloc, 100 * realloc, and free calls. There are tools (mtrplay) to 101 * replay and allocation pattern or to graph heap structure 102 * (mtrgraph) which can interpret these logs. 103 * 'Z' / ['z'] Zero out / do not zero all allocations. 104 * Each new byte of memory allocated by malloc, realloc, or 105 * reallocf will be initialized to 0. This is intended for 106 * debugging and will affect performance negatively. 107 * 'H' / ['h'] Pass a hint to the kernel about pages unused by the 108 * allocation functions. 109 */ 110 111 /* cc -shared -fPIC -g -O -I/usr/src/lib/libc/include -o nmalloc.so nmalloc.c */ 112 113 #include "namespace.h" 114 #include <sys/param.h> 115 #include <sys/types.h> 116 #include <sys/mman.h> 117 #include <sys/queue.h> 118 #include <sys/ktrace.h> 119 #include <stdio.h> 120 #include <stdint.h> 121 #include <stdlib.h> 122 #include <stdarg.h> 123 #include <stddef.h> 124 #include <unistd.h> 125 #include <string.h> 126 #include <fcntl.h> 127 #include <errno.h> 128 #include <pthread.h> 129 #include <machine/atomic.h> 130 #include "un-namespace.h" 131 132 #include "libc_private.h" 133 #include "spinlock.h" 134 135 void __free(void *); 136 void *__malloc(size_t); 137 void *__calloc(size_t, size_t); 138 void *__realloc(void *, size_t); 139 void *__aligned_alloc(size_t, size_t); 140 int __posix_memalign(void **, size_t, size_t); 141 142 /* 143 * Linked list of large allocations 144 */ 145 typedef struct bigalloc { 146 struct bigalloc *next; /* hash link */ 147 void *base; /* base pointer */ 148 u_long active; /* bytes active */ 149 u_long bytes; /* bytes allocated */ 150 } *bigalloc_t; 151 152 /* 153 * Note that any allocations which are exact multiples of PAGE_SIZE, or 154 * which are >= ZALLOC_ZONE_LIMIT, will fall through to the kmem subsystem. 155 */ 156 #define ZALLOC_ZONE_LIMIT (16 * 1024) /* max slab-managed alloc */ 157 #define ZALLOC_MIN_ZONE_SIZE (32 * 1024) /* minimum zone size */ 158 #define ZALLOC_MAX_ZONE_SIZE (128 * 1024) /* maximum zone size */ 159 #define ZALLOC_ZONE_SIZE (64 * 1024) 160 #define ZALLOC_SLAB_MAGIC 0x736c6162 /* magic sanity */ 161 #define ZALLOC_SLAB_SLIDE 20 /* L1-cache skip */ 162 163 #if ZALLOC_ZONE_LIMIT == 16384 164 #define NZONES 72 165 #elif ZALLOC_ZONE_LIMIT == 32768 166 #define NZONES 80 167 #else 168 #error "I couldn't figure out NZONES" 169 #endif 170 171 /* 172 * Chunk structure for free elements 173 */ 174 typedef struct slchunk { 175 struct slchunk *c_Next; 176 } *slchunk_t; 177 178 /* 179 * The IN-BAND zone header is placed at the beginning of each zone. 180 */ 181 struct slglobaldata; 182 183 typedef struct slzone { 184 int32_t z_Magic; /* magic number for sanity check */ 185 int z_NFree; /* total free chunks / ualloc space */ 186 struct slzone *z_Next; /* ZoneAry[] link if z_NFree non-zero */ 187 int z_NMax; /* maximum free chunks */ 188 char *z_BasePtr; /* pointer to start of chunk array */ 189 int z_UIndex; /* current initial allocation index */ 190 int z_UEndIndex; /* last (first) allocation index */ 191 int z_ChunkSize; /* chunk size for validation */ 192 int z_FirstFreePg; /* chunk list on a page-by-page basis */ 193 int z_ZoneIndex; 194 int z_Flags; 195 struct slchunk *z_PageAry[ZALLOC_ZONE_SIZE / PAGE_SIZE]; 196 } *slzone_t; 197 198 typedef struct slglobaldata { 199 spinlock_t Spinlock; 200 slzone_t ZoneAry[NZONES];/* linked list of zones NFree > 0 */ 201 int JunkIndex; 202 } *slglobaldata_t; 203 204 #define SLZF_UNOTZEROD 0x0001 205 206 #define FASTSLABREALLOC 0x02 207 208 /* 209 * Misc constants. Note that allocations that are exact multiples of 210 * PAGE_SIZE, or exceed the zone limit, fall through to the kmem module. 211 * IN_SAME_PAGE_MASK is used to sanity-check the per-page free lists. 212 */ 213 #define MIN_CHUNK_SIZE 8 /* in bytes */ 214 #define MIN_CHUNK_MASK (MIN_CHUNK_SIZE - 1) 215 #define IN_SAME_PAGE_MASK (~(intptr_t)PAGE_MASK | MIN_CHUNK_MASK) 216 217 /* 218 * WARNING: A limited number of spinlocks are available, BIGXSIZE should 219 * not be larger then 64. 220 */ 221 #define BIGHSHIFT 10 /* bigalloc hash table */ 222 #define BIGHSIZE (1 << BIGHSHIFT) 223 #define BIGHMASK (BIGHSIZE - 1) 224 #define BIGXSIZE (BIGHSIZE / 16) /* bigalloc lock table */ 225 #define BIGXMASK (BIGXSIZE - 1) 226 227 /* 228 * BIGCACHE caches oversized allocations. Note that a linear search is 229 * performed, so do not make the cache too large. 230 * 231 * BIGCACHE will garbage-collect excess space when the excess exceeds the 232 * specified value. A relatively large number should be used here because 233 * garbage collection is expensive. 234 */ 235 #define BIGCACHE 16 236 #define BIGCACHE_MASK (BIGCACHE - 1) 237 #define BIGCACHE_LIMIT (1024 * 1024) /* size limit */ 238 #define BIGCACHE_EXCESS (16 * 1024 * 1024) /* garbage collect */ 239 240 #define CACHE_CHUNKS 32 241 242 #define SAFLAG_ZERO 0x0001 243 #define SAFLAG_PASSIVE 0x0002 244 #define SAFLAG_MAGS 0x0004 245 246 /* 247 * Thread control 248 */ 249 250 #define arysize(ary) (sizeof(ary)/sizeof((ary)[0])) 251 252 /* 253 * The assertion macros try to pretty-print assertion failures 254 * which can be caused by corruption. If a lock is held, we 255 * provide a macro that attempts to release it before asserting 256 * in order to prevent (e.g.) a reentrant SIGABRT calling malloc 257 * and deadlocking, resulting in the program freezing up. 258 */ 259 #define MASSERT(exp) \ 260 do { if (__predict_false(!(exp))) \ 261 _mpanic("assertion: %s in %s", \ 262 #exp, __func__); \ 263 } while (0) 264 265 #define MASSERT_WTHUNLK(exp, unlk) \ 266 do { if (__predict_false(!(exp))) { \ 267 unlk; \ 268 _mpanic("assertion: %s in %s", \ 269 #exp, __func__); \ 270 } \ 271 } while (0) 272 273 /* 274 * Magazines 275 */ 276 277 #define M_MAX_ROUNDS 64 278 #define M_ZONE_ROUNDS 64 279 #define M_LOW_ROUNDS 32 280 #define M_INIT_ROUNDS 8 281 #define M_BURST_FACTOR 8 282 #define M_BURST_NSCALE 2 283 284 #define M_BURST 0x0001 285 #define M_BURST_EARLY 0x0002 286 287 struct magazine { 288 SLIST_ENTRY(magazine) nextmagazine; 289 290 int flags; 291 int capacity; /* Max rounds in this magazine */ 292 int rounds; /* Current number of free rounds */ 293 int burst_factor; /* Number of blocks to prefill with */ 294 int low_factor; /* Free till low_factor from full mag */ 295 void *objects[M_MAX_ROUNDS]; 296 }; 297 298 SLIST_HEAD(magazinelist, magazine); 299 300 static spinlock_t zone_mag_lock; 301 static spinlock_t depot_spinlock; 302 static struct magazine zone_magazine = { 303 .flags = M_BURST | M_BURST_EARLY, 304 .capacity = M_ZONE_ROUNDS, 305 .rounds = 0, 306 .burst_factor = M_BURST_FACTOR, 307 .low_factor = M_LOW_ROUNDS 308 }; 309 310 #define MAGAZINE_FULL(mp) (mp->rounds == mp->capacity) 311 #define MAGAZINE_NOTFULL(mp) (mp->rounds < mp->capacity) 312 #define MAGAZINE_EMPTY(mp) (mp->rounds == 0) 313 #define MAGAZINE_NOTEMPTY(mp) (mp->rounds != 0) 314 315 /* 316 * Each thread will have a pair of magazines per size-class (NZONES) 317 * The loaded magazine will support immediate allocations, the previous 318 * magazine will either be full or empty and can be swapped at need 319 */ 320 typedef struct magazine_pair { 321 struct magazine *loaded; 322 struct magazine *prev; 323 } magazine_pair; 324 325 /* A depot is a collection of magazines for a single zone. */ 326 typedef struct magazine_depot { 327 struct magazinelist full; 328 struct magazinelist empty; 329 spinlock_t lock; 330 } magazine_depot; 331 332 typedef struct thr_mags { 333 magazine_pair mags[NZONES]; 334 struct magazine *newmag; 335 int init; 336 } thr_mags; 337 338 static __thread thr_mags thread_mags TLS_ATTRIBUTE; 339 static pthread_key_t thread_mags_key; 340 static pthread_once_t thread_mags_once = PTHREAD_ONCE_INIT; 341 static magazine_depot depots[NZONES]; 342 343 /* 344 * Fixed globals (not per-cpu) 345 */ 346 static const int ZoneSize = ZALLOC_ZONE_SIZE; 347 static const int ZoneLimit = ZALLOC_ZONE_LIMIT; 348 static const int ZonePageCount = ZALLOC_ZONE_SIZE / PAGE_SIZE; 349 static const int ZoneMask = ZALLOC_ZONE_SIZE - 1; 350 351 static int opt_madvise = 0; 352 static int opt_utrace = 0; 353 static int g_malloc_flags = 0; 354 static struct slglobaldata SLGlobalData; 355 static bigalloc_t bigalloc_array[BIGHSIZE]; 356 static spinlock_t bigspin_array[BIGXSIZE]; 357 static volatile void *bigcache_array[BIGCACHE]; /* atomic swap */ 358 static volatile size_t bigcache_size_array[BIGCACHE]; /* SMP races ok */ 359 static volatile int bigcache_index; /* SMP races ok */ 360 static int malloc_panic; 361 static size_t excess_alloc; /* excess big allocs */ 362 363 static void *_slaballoc(size_t size, int flags); 364 static void *_slabrealloc(void *ptr, size_t size); 365 static void _slabfree(void *ptr, int, bigalloc_t *); 366 static int _slabmemalign(void **memptr, size_t alignment, size_t size); 367 static void *_vmem_alloc(size_t bytes, size_t align, int flags); 368 static void _vmem_free(void *ptr, size_t bytes); 369 static void *magazine_alloc(struct magazine *, int *); 370 static int magazine_free(struct magazine *, void *); 371 static void *mtmagazine_alloc(int zi, int flags); 372 static int mtmagazine_free(int zi, void *); 373 static void mtmagazine_init(void); 374 static void mtmagazine_destructor(void *); 375 static slzone_t zone_alloc(int flags); 376 static void zone_free(void *z); 377 static void _mpanic(const char *ctl, ...) __printflike(1, 2); 378 static void malloc_init(void) __constructor(101); 379 380 struct nmalloc_utrace { 381 void *p; 382 size_t s; 383 void *r; 384 }; 385 386 #define UTRACE(a, b, c) \ 387 if (opt_utrace) { \ 388 struct nmalloc_utrace ut = { \ 389 .p = (a), \ 390 .s = (b), \ 391 .r = (c) \ 392 }; \ 393 utrace(&ut, sizeof(ut)); \ 394 } 395 396 static void 397 malloc_init(void) 398 { 399 const char *p = NULL; 400 401 if (issetugid() == 0) 402 p = getenv("MALLOC_OPTIONS"); 403 404 for (; p != NULL && *p != '\0'; p++) { 405 switch(*p) { 406 case 'u': opt_utrace = 0; break; 407 case 'U': opt_utrace = 1; break; 408 case 'h': opt_madvise = 0; break; 409 case 'H': opt_madvise = 1; break; 410 case 'z': g_malloc_flags = 0; break; 411 case 'Z': g_malloc_flags = SAFLAG_ZERO; break; 412 default: 413 break; 414 } 415 } 416 417 UTRACE((void *) -1, 0, NULL); 418 } 419 420 /* 421 * We have to install a handler for nmalloc thread teardowns when 422 * the thread is created. We cannot delay this because destructors in 423 * sophisticated userland programs can call malloc() for the first time 424 * during their thread exit. 425 * 426 * This routine is called directly from pthreads. 427 */ 428 void 429 _nmalloc_thr_init(void) 430 { 431 thr_mags *tp; 432 433 /* 434 * Disallow mtmagazine operations until the mtmagazine is 435 * initialized. 436 */ 437 tp = &thread_mags; 438 tp->init = -1; 439 440 _pthread_once(&thread_mags_once, mtmagazine_init); 441 _pthread_setspecific(thread_mags_key, tp); 442 tp->init = 1; 443 } 444 445 void 446 _nmalloc_thr_prepfork(void) 447 { 448 if (__isthreaded) { 449 _SPINLOCK(&zone_mag_lock); 450 _SPINLOCK(&depot_spinlock); 451 } 452 } 453 454 void 455 _nmalloc_thr_parentfork(void) 456 { 457 if (__isthreaded) { 458 _SPINUNLOCK(&depot_spinlock); 459 _SPINUNLOCK(&zone_mag_lock); 460 } 461 } 462 463 void 464 _nmalloc_thr_childfork(void) 465 { 466 if (__isthreaded) { 467 _SPINUNLOCK(&depot_spinlock); 468 _SPINUNLOCK(&zone_mag_lock); 469 } 470 } 471 472 /* 473 * Handle signal reentrancy safely whether we are threaded or not. 474 * This improves the stability for mono and will probably improve 475 * stability for other high-level languages which are becoming increasingly 476 * sophisticated. 477 * 478 * The sigblockall()/sigunblockall() implementation uses a counter on 479 * a per-thread shared user/kernel page, avoids system calls, and is thus 480 * very fast. 481 */ 482 static __inline void 483 nmalloc_sigblockall(void) 484 { 485 sigblockall(); 486 } 487 488 static __inline void 489 nmalloc_sigunblockall(void) 490 { 491 sigunblockall(); 492 } 493 494 /* 495 * Thread locks. 496 */ 497 static __inline void 498 slgd_lock(slglobaldata_t slgd) 499 { 500 if (__isthreaded) 501 _SPINLOCK(&slgd->Spinlock); 502 else 503 sigblockall(); 504 } 505 506 static __inline void 507 slgd_unlock(slglobaldata_t slgd) 508 { 509 if (__isthreaded) 510 _SPINUNLOCK(&slgd->Spinlock); 511 else 512 sigunblockall(); 513 } 514 515 static __inline void 516 depot_lock(magazine_depot *dp __unused) 517 { 518 if (__isthreaded) 519 _SPINLOCK(&depot_spinlock); 520 else 521 sigblockall(); 522 #if 0 523 if (__isthreaded) 524 _SPINLOCK(&dp->lock); 525 #endif 526 } 527 528 static __inline void 529 depot_unlock(magazine_depot *dp __unused) 530 { 531 if (__isthreaded) 532 _SPINUNLOCK(&depot_spinlock); 533 else 534 sigunblockall(); 535 #if 0 536 if (__isthreaded) 537 _SPINUNLOCK(&dp->lock); 538 #endif 539 } 540 541 static __inline void 542 zone_magazine_lock(void) 543 { 544 if (__isthreaded) 545 _SPINLOCK(&zone_mag_lock); 546 else 547 sigblockall(); 548 } 549 550 static __inline void 551 zone_magazine_unlock(void) 552 { 553 if (__isthreaded) 554 _SPINUNLOCK(&zone_mag_lock); 555 else 556 sigunblockall(); 557 } 558 559 static __inline void 560 swap_mags(magazine_pair *mp) 561 { 562 struct magazine *tmp; 563 tmp = mp->loaded; 564 mp->loaded = mp->prev; 565 mp->prev = tmp; 566 } 567 568 /* 569 * bigalloc hashing and locking support. 570 * 571 * Return an unmasked hash code for the passed pointer. 572 */ 573 static __inline int 574 _bigalloc_hash(void *ptr) 575 { 576 int hv; 577 578 hv = ((int)(intptr_t)ptr >> PAGE_SHIFT) ^ 579 ((int)(intptr_t)ptr >> (PAGE_SHIFT + BIGHSHIFT)); 580 581 return(hv); 582 } 583 584 /* 585 * Lock the hash chain and return a pointer to its base for the specified 586 * address. 587 */ 588 static __inline bigalloc_t * 589 bigalloc_lock(void *ptr) 590 { 591 int hv = _bigalloc_hash(ptr); 592 bigalloc_t *bigp; 593 594 bigp = &bigalloc_array[hv & BIGHMASK]; 595 if (__isthreaded) 596 _SPINLOCK(&bigspin_array[hv & BIGXMASK]); 597 return(bigp); 598 } 599 600 /* 601 * Lock the hash chain and return a pointer to its base for the specified 602 * address. 603 * 604 * BUT, if the hash chain is empty, just return NULL and do not bother 605 * to lock anything. 606 */ 607 static __inline bigalloc_t * 608 bigalloc_check_and_lock(void *ptr) 609 { 610 int hv = _bigalloc_hash(ptr); 611 bigalloc_t *bigp; 612 613 bigp = &bigalloc_array[hv & BIGHMASK]; 614 if (*bigp == NULL) 615 return(NULL); 616 if (__isthreaded) { 617 _SPINLOCK(&bigspin_array[hv & BIGXMASK]); 618 } 619 return(bigp); 620 } 621 622 static __inline void 623 bigalloc_unlock(void *ptr) 624 { 625 int hv; 626 627 if (__isthreaded) { 628 hv = _bigalloc_hash(ptr); 629 _SPINUNLOCK(&bigspin_array[hv & BIGXMASK]); 630 } 631 } 632 633 /* 634 * Find a bigcache entry that might work for the allocation. SMP races are 635 * ok here except for the swap (that is, it is ok if bigcache_size_array[i] 636 * is wrong or if a NULL or too-small big is returned). 637 * 638 * Generally speaking it is ok to find a large entry even if the bytes 639 * requested are relatively small (but still oversized), because we really 640 * don't know *what* the application is going to do with the buffer. 641 */ 642 static __inline 643 bigalloc_t 644 bigcache_find_alloc(size_t bytes) 645 { 646 bigalloc_t big = NULL; 647 size_t test; 648 int i; 649 650 for (i = 0; i < BIGCACHE; ++i) { 651 test = bigcache_size_array[i]; 652 if (bytes <= test) { 653 bigcache_size_array[i] = 0; 654 big = atomic_swap_ptr(&bigcache_array[i], NULL); 655 break; 656 } 657 } 658 return big; 659 } 660 661 /* 662 * Free a bigcache entry, possibly returning one that the caller really must 663 * free. This is used to cache recent oversized memory blocks. Only 664 * big blocks smaller than BIGCACHE_LIMIT will be cached this way, so try 665 * to collect the biggest ones we can that are under the limit. 666 */ 667 static __inline 668 bigalloc_t 669 bigcache_find_free(bigalloc_t big) 670 { 671 int i; 672 int j; 673 int b; 674 675 b = ++bigcache_index; 676 for (i = 0; i < BIGCACHE; ++i) { 677 j = (b + i) & BIGCACHE_MASK; 678 if (bigcache_size_array[j] < big->bytes) { 679 bigcache_size_array[j] = big->bytes; 680 big = atomic_swap_ptr(&bigcache_array[j], big); 681 break; 682 } 683 } 684 return big; 685 } 686 687 static __inline 688 void 689 handle_excess_big(void) 690 { 691 int i; 692 bigalloc_t big; 693 bigalloc_t *bigp; 694 695 if (excess_alloc <= BIGCACHE_EXCESS) 696 return; 697 698 for (i = 0; i < BIGHSIZE; ++i) { 699 bigp = &bigalloc_array[i]; 700 if (*bigp == NULL) 701 continue; 702 if (__isthreaded) 703 _SPINLOCK(&bigspin_array[i & BIGXMASK]); 704 for (big = *bigp; big; big = big->next) { 705 if (big->active < big->bytes) { 706 MASSERT_WTHUNLK((big->active & PAGE_MASK) == 0, 707 _SPINUNLOCK(&bigspin_array[i & BIGXMASK])); 708 MASSERT_WTHUNLK((big->bytes & PAGE_MASK) == 0, 709 _SPINUNLOCK(&bigspin_array[i & BIGXMASK])); 710 munmap((char *)big->base + big->active, 711 big->bytes - big->active); 712 atomic_add_long(&excess_alloc, 713 big->active - big->bytes); 714 big->bytes = big->active; 715 } 716 } 717 if (__isthreaded) 718 _SPINUNLOCK(&bigspin_array[i & BIGXMASK]); 719 } 720 } 721 722 /* 723 * Calculate the zone index for the allocation request size and set the 724 * allocation request size to that particular zone's chunk size. 725 */ 726 static __inline int 727 zoneindex(size_t *bytes, size_t *chunking) 728 { 729 size_t n = (unsigned int)*bytes; /* unsigned for shift opt */ 730 731 /* 732 * This used to be 8-byte chunks and 16 zones for n < 128. 733 * However some instructions may require 16-byte alignment 734 * (aka SIMD) and programs might not request an aligned size 735 * (aka GCC-7), so change this as follows: 736 * 737 * 0-15 bytes 8-byte alignment in two zones (0-1) 738 * 16-127 bytes 16-byte alignment in four zones (3-10) 739 * zone index 2 and 11-15 are currently unused. 740 */ 741 if (n < 16) { 742 *bytes = n = (n + 7) & ~7; 743 *chunking = 8; 744 return(n / 8 - 1); /* 8 byte chunks, 2 zones */ 745 /* zones 0,1, zone 2 is unused */ 746 } 747 if (n < 128) { 748 *bytes = n = (n + 15) & ~15; 749 *chunking = 16; 750 return(n / 16 + 2); /* 16 byte chunks, 8 zones */ 751 /* zones 3-10, zones 11-15 unused */ 752 } 753 if (n < 256) { 754 *bytes = n = (n + 15) & ~15; 755 *chunking = 16; 756 return(n / 16 + 7); 757 } 758 if (n < 8192) { 759 if (n < 512) { 760 *bytes = n = (n + 31) & ~31; 761 *chunking = 32; 762 return(n / 32 + 15); 763 } 764 if (n < 1024) { 765 *bytes = n = (n + 63) & ~63; 766 *chunking = 64; 767 return(n / 64 + 23); 768 } 769 if (n < 2048) { 770 *bytes = n = (n + 127) & ~127; 771 *chunking = 128; 772 return(n / 128 + 31); 773 } 774 if (n < 4096) { 775 *bytes = n = (n + 255) & ~255; 776 *chunking = 256; 777 return(n / 256 + 39); 778 } 779 *bytes = n = (n + 511) & ~511; 780 *chunking = 512; 781 return(n / 512 + 47); 782 } 783 #if ZALLOC_ZONE_LIMIT > 8192 784 if (n < 16384) { 785 *bytes = n = (n + 1023) & ~1023; 786 *chunking = 1024; 787 return(n / 1024 + 55); 788 } 789 #endif 790 #if ZALLOC_ZONE_LIMIT > 16384 791 if (n < 32768) { 792 *bytes = n = (n + 2047) & ~2047; 793 *chunking = 2048; 794 return(n / 2048 + 63); 795 } 796 #endif 797 _mpanic("Unexpected byte count %zu", n); 798 return(0); 799 } 800 801 /* 802 * malloc() - call internal slab allocator 803 */ 804 void * 805 __malloc(size_t size) 806 { 807 void *ptr; 808 809 nmalloc_sigblockall(); 810 ptr = _slaballoc(size, 0); 811 if (ptr == NULL) 812 errno = ENOMEM; 813 else 814 UTRACE(0, size, ptr); 815 nmalloc_sigunblockall(); 816 817 return(ptr); 818 } 819 820 #define MUL_NO_OVERFLOW (1UL << (sizeof(size_t) * 4)) 821 822 /* 823 * calloc() - call internal slab allocator 824 */ 825 void * 826 __calloc(size_t number, size_t size) 827 { 828 void *ptr; 829 830 if ((number >= MUL_NO_OVERFLOW || size >= MUL_NO_OVERFLOW) && 831 number > 0 && SIZE_MAX / number < size) { 832 errno = ENOMEM; 833 return(NULL); 834 } 835 836 nmalloc_sigblockall(); 837 ptr = _slaballoc(number * size, SAFLAG_ZERO); 838 if (ptr == NULL) 839 errno = ENOMEM; 840 else 841 UTRACE(0, number * size, ptr); 842 nmalloc_sigunblockall(); 843 844 return(ptr); 845 } 846 847 /* 848 * realloc() (SLAB ALLOCATOR) 849 * 850 * We do not attempt to optimize this routine beyond reusing the same 851 * pointer if the new size fits within the chunking of the old pointer's 852 * zone. 853 */ 854 void * 855 __realloc(void *ptr, size_t size) 856 { 857 void *ret; 858 859 nmalloc_sigblockall(); 860 ret = _slabrealloc(ptr, size); 861 if (ret == NULL) 862 errno = ENOMEM; 863 else 864 UTRACE(ptr, size, ret); 865 nmalloc_sigunblockall(); 866 867 return(ret); 868 } 869 870 /* 871 * aligned_alloc() 872 * 873 * Allocate (size) bytes with a alignment of (alignment). 874 */ 875 void * 876 __aligned_alloc(size_t alignment, size_t size) 877 { 878 void *ptr; 879 int rc; 880 881 nmalloc_sigblockall(); 882 ptr = NULL; 883 rc = _slabmemalign(&ptr, alignment, size); 884 if (rc) 885 errno = rc; 886 nmalloc_sigunblockall(); 887 888 return (ptr); 889 } 890 891 /* 892 * posix_memalign() 893 * 894 * Allocate (size) bytes with a alignment of (alignment), where (alignment) 895 * is a power of 2 >= sizeof(void *). 896 */ 897 int 898 __posix_memalign(void **memptr, size_t alignment, size_t size) 899 { 900 int rc; 901 902 /* 903 * OpenGroup spec issue 6 check 904 */ 905 if (alignment < sizeof(void *)) { 906 *memptr = NULL; 907 return(EINVAL); 908 } 909 910 nmalloc_sigblockall(); 911 rc = _slabmemalign(memptr, alignment, size); 912 nmalloc_sigunblockall(); 913 914 return (rc); 915 } 916 917 /* 918 * The slab allocator will allocate on power-of-2 boundaries up to 919 * at least PAGE_SIZE. We use the zoneindex mechanic to find a 920 * zone matching the requirements, and _vmem_alloc() otherwise. 921 */ 922 static int 923 _slabmemalign(void **memptr, size_t alignment, size_t size) 924 { 925 bigalloc_t *bigp; 926 bigalloc_t big; 927 size_t chunking; 928 int zi __unused; 929 930 if (alignment < 1) { 931 *memptr = NULL; 932 return(EINVAL); 933 } 934 935 /* 936 * OpenGroup spec issue 6 checks 937 */ 938 if ((alignment | (alignment - 1)) + 1 != (alignment << 1)) { 939 *memptr = NULL; 940 return(EINVAL); 941 } 942 943 /* 944 * Our zone mechanism guarantees same-sized alignment for any 945 * power-of-2 allocation. If size is a power-of-2 and reasonable 946 * we can just call _slaballoc() and be done. We round size up 947 * to the nearest alignment boundary to improve our odds of 948 * it becoming a power-of-2 if it wasn't before. 949 */ 950 if (size <= alignment) 951 size = alignment; 952 else 953 size = (size + alignment - 1) & ~(size_t)(alignment - 1); 954 955 /* 956 * If we have overflowed above when rounding to the nearest alignment 957 * boundary, just return ENOMEM, size should be == N * sizeof(void *). 958 * 959 * Power-of-2 allocations up to 8KB will be aligned to the allocation 960 * size and _slaballoc() can simply be used. Please see line 1082 961 * for this special case: 'Align the storage in the zone based on 962 * the chunking' has a special case for powers of 2. 963 */ 964 if (size == 0) 965 return(ENOMEM); 966 967 if (size <= PAGE_SIZE*2 && (size | (size - 1)) + 1 == (size << 1)) { 968 *memptr = _slaballoc(size, 0); 969 return(*memptr ? 0 : ENOMEM); 970 } 971 972 /* 973 * Otherwise locate a zone with a chunking that matches 974 * the requested alignment, within reason. Consider two cases: 975 * 976 * (1) A 1K allocation on a 32-byte alignment. The first zoneindex 977 * we find will be the best fit because the chunking will be 978 * greater or equal to the alignment. 979 * 980 * (2) A 513 allocation on a 256-byte alignment. In this case 981 * the first zoneindex we find will be for 576 byte allocations 982 * with a chunking of 64, which is not sufficient. To fix this 983 * we simply find the nearest power-of-2 >= size and use the 984 * same side-effect of _slaballoc() which guarantees 985 * same-alignment on a power-of-2 allocation. 986 */ 987 if (size < PAGE_SIZE) { 988 zi = zoneindex(&size, &chunking); 989 if (chunking >= alignment) { 990 *memptr = _slaballoc(size, 0); 991 return(*memptr ? 0 : ENOMEM); 992 } 993 if (size >= 1024) 994 alignment = 1024; 995 if (size >= 16384) 996 alignment = 16384; 997 while (alignment < size) 998 alignment <<= 1; 999 *memptr = _slaballoc(alignment, 0); 1000 return(*memptr ? 0 : ENOMEM); 1001 } 1002 1003 /* 1004 * If the slab allocator cannot handle it use vmem_alloc(). 1005 * 1006 * Alignment must be adjusted up to at least PAGE_SIZE in this case. 1007 */ 1008 if (alignment < PAGE_SIZE) 1009 alignment = PAGE_SIZE; 1010 if (size < alignment) 1011 size = alignment; 1012 size = (size + PAGE_MASK) & ~(size_t)PAGE_MASK; 1013 if (alignment == PAGE_SIZE && size <= BIGCACHE_LIMIT) { 1014 big = bigcache_find_alloc(size); 1015 if (big && big->bytes < size) { 1016 _slabfree(big->base, FASTSLABREALLOC, &big); 1017 big = NULL; 1018 } 1019 if (big) { 1020 *memptr = big->base; 1021 big->active = size; 1022 if (big->active < big->bytes) { 1023 atomic_add_long(&excess_alloc, 1024 big->bytes - big->active); 1025 } 1026 bigp = bigalloc_lock(*memptr); 1027 big->next = *bigp; 1028 *bigp = big; 1029 bigalloc_unlock(*memptr); 1030 handle_excess_big(); 1031 return(0); 1032 } 1033 } 1034 *memptr = _vmem_alloc(size, alignment, 0); 1035 if (*memptr == NULL) 1036 return(ENOMEM); 1037 1038 big = _slaballoc(sizeof(struct bigalloc), 0); 1039 if (big == NULL) { 1040 _vmem_free(*memptr, size); 1041 *memptr = NULL; 1042 return(ENOMEM); 1043 } 1044 bigp = bigalloc_lock(*memptr); 1045 big->base = *memptr; 1046 big->active = size; 1047 big->bytes = size; /* no excess */ 1048 big->next = *bigp; 1049 *bigp = big; 1050 bigalloc_unlock(*memptr); 1051 1052 return(0); 1053 } 1054 1055 /* 1056 * free() (SLAB ALLOCATOR) - do the obvious 1057 */ 1058 void 1059 __free(void *ptr) 1060 { 1061 UTRACE(ptr, 0, 0); 1062 nmalloc_sigblockall(); 1063 _slabfree(ptr, 0, NULL); 1064 nmalloc_sigunblockall(); 1065 } 1066 1067 /* 1068 * _slaballoc() (SLAB ALLOCATOR) 1069 * 1070 * Allocate memory via the slab allocator. If the request is too large, 1071 * or if it page-aligned beyond a certain size, we fall back to the 1072 * KMEM subsystem 1073 */ 1074 static void * 1075 _slaballoc(size_t size, int flags) 1076 { 1077 slzone_t z; 1078 slchunk_t chunk; 1079 slglobaldata_t slgd; 1080 size_t chunking; 1081 thr_mags *tp; 1082 struct magazine *mp; 1083 int count; 1084 int zi; 1085 int off; 1086 void *obj; 1087 1088 /* 1089 * Handle the degenerate size == 0 case. Yes, this does happen. 1090 * Return a special pointer. This is to maintain compatibility with 1091 * the original malloc implementation. Certain devices, such as the 1092 * adaptec driver, not only allocate 0 bytes, they check for NULL and 1093 * also realloc() later on. Joy. 1094 */ 1095 if (size == 0) 1096 size = 1; 1097 1098 /* Capture global flags */ 1099 flags |= g_malloc_flags; 1100 1101 /* 1102 * Handle large allocations directly. There should not be very many 1103 * of these so performance is not a big issue. 1104 * 1105 * The backend allocator is pretty nasty on a SMP system. Use the 1106 * slab allocator for one and two page-sized chunks even though we 1107 * lose some efficiency. 1108 * 1109 * NOTE: Please see posix_memalign around line 864, which assumes 1110 * that power-of-2 allocations of PAGE_SIZE and PAGE_SIZE*2 1111 * can use _slaballoc() and be aligned to the same. The 1112 * zone cache can be used for this case, bigalloc does not 1113 * have to be used. 1114 */ 1115 if (size >= ZoneLimit || 1116 ((size & PAGE_MASK) == 0 && size > PAGE_SIZE*2)) { 1117 bigalloc_t big; 1118 bigalloc_t *bigp; 1119 1120 /* 1121 * Page-align and cache-color in case of virtually indexed 1122 * physically tagged L1 caches (aka SandyBridge). No sweat 1123 * otherwise, so just do it. 1124 * 1125 * (don't count as excess). 1126 */ 1127 size = (size + PAGE_MASK) & ~(size_t)PAGE_MASK; 1128 1129 /* 1130 * If we have overflowed above when rounding to the page 1131 * boundary, something has passed us (size_t)[-PAGE_MASK..-1] 1132 * so just return NULL, size at this point should be >= 0. 1133 */ 1134 if (size == 0) 1135 return (NULL); 1136 1137 if ((size & (PAGE_SIZE * 2 - 1)) == 0) 1138 size += PAGE_SIZE; 1139 1140 /* 1141 * Try to reuse a cached big block to avoid mmap'ing. If it 1142 * turns out not to fit our requirements we throw it away 1143 * and allocate normally. 1144 */ 1145 big = NULL; 1146 if (size <= BIGCACHE_LIMIT) { 1147 big = bigcache_find_alloc(size); 1148 if (big && big->bytes < size) { 1149 _slabfree(big->base, FASTSLABREALLOC, &big); 1150 big = NULL; 1151 } 1152 } 1153 if (big) { 1154 chunk = big->base; 1155 if (flags & SAFLAG_ZERO) 1156 bzero(chunk, size); 1157 } else { 1158 chunk = _vmem_alloc(size, PAGE_SIZE, flags); 1159 if (chunk == NULL) 1160 return(NULL); 1161 1162 big = _slaballoc(sizeof(struct bigalloc), 0); 1163 if (big == NULL) { 1164 _vmem_free(chunk, size); 1165 return(NULL); 1166 } 1167 big->base = chunk; 1168 big->bytes = size; 1169 } 1170 big->active = size; 1171 1172 bigp = bigalloc_lock(chunk); 1173 if (big->active < big->bytes) { 1174 atomic_add_long(&excess_alloc, 1175 big->bytes - big->active); 1176 } 1177 big->next = *bigp; 1178 *bigp = big; 1179 bigalloc_unlock(chunk); 1180 handle_excess_big(); 1181 1182 return(chunk); 1183 } 1184 1185 /* Compute allocation zone; zoneindex will panic on excessive sizes */ 1186 zi = zoneindex(&size, &chunking); 1187 MASSERT(zi < NZONES); 1188 1189 obj = mtmagazine_alloc(zi, flags); 1190 if (obj != NULL) { 1191 if (flags & SAFLAG_ZERO) 1192 bzero(obj, size); 1193 return (obj); 1194 } 1195 1196 /* 1197 * Attempt to allocate out of an existing global zone. If all zones 1198 * are exhausted pull one off the free list or allocate a new one. 1199 */ 1200 slgd = &SLGlobalData; 1201 1202 again: 1203 if (slgd->ZoneAry[zi] == NULL) { 1204 z = zone_alloc(flags); 1205 if (z == NULL) 1206 goto fail; 1207 1208 /* 1209 * How big is the base structure? 1210 */ 1211 off = sizeof(struct slzone); 1212 1213 /* 1214 * Align the storage in the zone based on the chunking. 1215 * 1216 * Guarantee power-of-2 alignment for power-of-2-sized 1217 * chunks. Otherwise align based on the chunking size 1218 * (typically 8 or 16 bytes for small allocations). 1219 * 1220 * NOTE: Allocations >= ZoneLimit are governed by the 1221 * bigalloc code and typically only guarantee page-alignment. 1222 * 1223 * Set initial conditions for UIndex near the zone header 1224 * to reduce unecessary page faults, vs semi-randomization 1225 * to improve L1 cache saturation. 1226 * 1227 * NOTE: Please see posix_memalign around line 864-ish, which 1228 * assumes that power-of-2 allocations of PAGE_SIZE 1229 * and PAGE_SIZE*2 can use _slaballoc() and be aligned 1230 * to the same. The zone cache can be used for this 1231 * case, bigalloc does not have to be used. 1232 * 1233 * ALL power-of-2 requests that fall through to this 1234 * code use this rule (conditionals above limit this 1235 * to <= PAGE_SIZE*2. 1236 */ 1237 if ((size | (size - 1)) + 1 == (size << 1)) 1238 off = roundup2(off, size); 1239 else 1240 off = roundup2(off, chunking); 1241 z->z_Magic = ZALLOC_SLAB_MAGIC; 1242 z->z_ZoneIndex = zi; 1243 z->z_NMax = (ZoneSize - off) / size; 1244 z->z_NFree = z->z_NMax; 1245 z->z_BasePtr = (char *)z + off; 1246 z->z_UIndex = z->z_UEndIndex = 0; 1247 z->z_ChunkSize = size; 1248 z->z_FirstFreePg = ZonePageCount; 1249 if ((z->z_Flags & SLZF_UNOTZEROD) == 0) { 1250 flags &= ~SAFLAG_ZERO; /* already zero'd */ 1251 flags |= SAFLAG_PASSIVE; 1252 } 1253 1254 /* 1255 * Slide the base index for initial allocations out of the 1256 * next zone we create so we do not over-weight the lower 1257 * part of the cpu memory caches. 1258 */ 1259 slgd_lock(slgd); 1260 z->z_Next = slgd->ZoneAry[zi]; 1261 slgd->ZoneAry[zi] = z; 1262 slgd->JunkIndex = (slgd->JunkIndex + ZALLOC_SLAB_SLIDE) 1263 & (ZALLOC_MAX_ZONE_SIZE - 1); 1264 } else { 1265 slgd_lock(slgd); 1266 z = slgd->ZoneAry[zi]; 1267 if (z == NULL) { 1268 slgd_unlock(slgd); 1269 goto again; 1270 } 1271 } 1272 1273 /* 1274 * Ok, we have a zone from which at least one chunk is available. 1275 */ 1276 MASSERT_WTHUNLK(z->z_NFree > 0, slgd_unlock(slgd)); 1277 1278 /* 1279 * Try to cache <count> chunks, up to CACHE_CHUNKS (32 typ) 1280 * to avoid unnecessary global lock contention. 1281 */ 1282 tp = &thread_mags; 1283 mp = tp->mags[zi].loaded; 1284 count = 0; 1285 if (mp && tp->init >= 0) { 1286 count = mp->capacity - mp->rounds; 1287 if (count >= z->z_NFree) 1288 count = z->z_NFree - 1; 1289 if (count > CACHE_CHUNKS) 1290 count = CACHE_CHUNKS; 1291 } 1292 1293 /* 1294 * Locate a chunk in a free page. This attempts to localize 1295 * reallocations into earlier pages without us having to sort 1296 * the chunk list. A chunk may still overlap a page boundary. 1297 */ 1298 while (z->z_FirstFreePg < ZonePageCount) { 1299 if ((chunk = z->z_PageAry[z->z_FirstFreePg]) != NULL) { 1300 if (((uintptr_t)chunk & ZoneMask) == 0) { 1301 slgd_unlock(slgd); 1302 _mpanic("assertion: corrupt malloc zone"); 1303 } 1304 z->z_PageAry[z->z_FirstFreePg] = chunk->c_Next; 1305 --z->z_NFree; 1306 1307 if (count == 0) 1308 goto done; 1309 mp->objects[mp->rounds++] = chunk; 1310 --count; 1311 continue; 1312 } 1313 ++z->z_FirstFreePg; 1314 } 1315 1316 /* 1317 * No chunks are available but NFree said we had some memory, 1318 * so it must be available in the never-before-used-memory 1319 * area governed by UIndex. The consequences are very 1320 * serious if our zone got corrupted so we use an explicit 1321 * panic rather then a KASSERT. 1322 */ 1323 for (;;) { 1324 chunk = (slchunk_t)(z->z_BasePtr + z->z_UIndex * size); 1325 --z->z_NFree; 1326 if (++z->z_UIndex == z->z_NMax) 1327 z->z_UIndex = 0; 1328 if (z->z_UIndex == z->z_UEndIndex) { 1329 if (z->z_NFree != 0) { 1330 slgd_unlock(slgd); 1331 _mpanic("slaballoc: corrupted zone"); 1332 } 1333 } 1334 if (count == 0) 1335 break; 1336 mp->objects[mp->rounds++] = chunk; 1337 --count; 1338 } 1339 1340 if ((z->z_Flags & SLZF_UNOTZEROD) == 0) { 1341 flags &= ~SAFLAG_ZERO; 1342 flags |= SAFLAG_PASSIVE; 1343 } 1344 1345 done: 1346 /* 1347 * Remove us from the ZoneAry[] when we become empty 1348 */ 1349 if (z->z_NFree == 0) { 1350 slgd->ZoneAry[zi] = z->z_Next; 1351 z->z_Next = NULL; 1352 } 1353 slgd_unlock(slgd); 1354 if (flags & SAFLAG_ZERO) 1355 bzero(chunk, size); 1356 1357 return(chunk); 1358 fail: 1359 return(NULL); 1360 } 1361 1362 /* 1363 * Reallocate memory within the chunk 1364 */ 1365 static void * 1366 _slabrealloc(void *ptr, size_t size) 1367 { 1368 bigalloc_t *bigp; 1369 void *nptr; 1370 slzone_t z; 1371 size_t chunking; 1372 1373 if (ptr == NULL) { 1374 return(_slaballoc(size, 0)); 1375 } 1376 1377 if (size == 0) 1378 size = 1; 1379 1380 /* 1381 * Handle oversized allocations. 1382 */ 1383 if ((bigp = bigalloc_check_and_lock(ptr)) != NULL) { 1384 bigalloc_t big; 1385 size_t bigbytes; 1386 1387 while ((big = *bigp) != NULL) { 1388 if (big->base == ptr) { 1389 size = (size + PAGE_MASK) & ~(size_t)PAGE_MASK; 1390 bigbytes = big->bytes; 1391 1392 /* 1393 * If it already fits determine if it makes 1394 * sense to shrink/reallocate. Try to optimize 1395 * programs which stupidly make incremental 1396 * reallocations larger or smaller by scaling 1397 * the allocation. Also deal with potential 1398 * coloring. 1399 */ 1400 if (size >= (bigbytes >> 1) && 1401 size <= bigbytes) { 1402 if (big->active != size) { 1403 atomic_add_long(&excess_alloc, 1404 big->active - 1405 size); 1406 } 1407 big->active = size; 1408 bigalloc_unlock(ptr); 1409 return(ptr); 1410 } 1411 1412 /* 1413 * For large reallocations, allocate more space 1414 * than we need to try to avoid excessive 1415 * reallocations later on. 1416 */ 1417 chunking = size + (size >> 3); 1418 chunking = (chunking + PAGE_MASK) & 1419 ~(size_t)PAGE_MASK; 1420 1421 /* 1422 * Try to allocate adjacently in case the 1423 * program is idiotically realloc()ing a 1424 * huge memory block just slightly bigger. 1425 * (llvm's llc tends to do this a lot). 1426 * 1427 * (MAP_TRYFIXED forces mmap to fail if there 1428 * is already something at the address). 1429 */ 1430 if (chunking > bigbytes) { 1431 char *addr; 1432 int errno_save = errno; 1433 1434 addr = mmap((char *)ptr + bigbytes, 1435 chunking - bigbytes, 1436 PROT_READ|PROT_WRITE, 1437 MAP_PRIVATE|MAP_ANON| 1438 MAP_TRYFIXED, 1439 -1, 0); 1440 errno = errno_save; 1441 if (addr == (char *)ptr + bigbytes) { 1442 atomic_add_long(&excess_alloc, 1443 big->active - 1444 big->bytes + 1445 chunking - 1446 size); 1447 big->bytes = chunking; 1448 big->active = size; 1449 bigalloc_unlock(ptr); 1450 1451 return(ptr); 1452 } 1453 MASSERT_WTHUNLK( 1454 (void *)addr == MAP_FAILED, 1455 bigalloc_unlock(ptr)); 1456 } 1457 1458 /* 1459 * Failed, unlink big and allocate fresh. 1460 * (note that we have to leave (big) intact 1461 * in case the slaballoc fails). 1462 */ 1463 *bigp = big->next; 1464 bigalloc_unlock(ptr); 1465 if ((nptr = _slaballoc(size, 0)) == NULL) { 1466 /* Relink block */ 1467 bigp = bigalloc_lock(ptr); 1468 big->next = *bigp; 1469 *bigp = big; 1470 bigalloc_unlock(ptr); 1471 return(NULL); 1472 } 1473 if (size > bigbytes) 1474 size = bigbytes; 1475 bcopy(ptr, nptr, size); 1476 atomic_add_long(&excess_alloc, big->active - 1477 big->bytes); 1478 _slabfree(ptr, FASTSLABREALLOC, &big); 1479 1480 return(nptr); 1481 } 1482 bigp = &big->next; 1483 } 1484 bigalloc_unlock(ptr); 1485 handle_excess_big(); 1486 } 1487 1488 /* 1489 * Get the original allocation's zone. If the new request winds 1490 * up using the same chunk size we do not have to do anything. 1491 * 1492 * NOTE: We don't have to lock the globaldata here, the fields we 1493 * access here will not change at least as long as we have control 1494 * over the allocation. 1495 */ 1496 z = (slzone_t)((uintptr_t)ptr & ~(uintptr_t)ZoneMask); 1497 MASSERT(z->z_Magic == ZALLOC_SLAB_MAGIC); 1498 1499 /* 1500 * Use zoneindex() to chunk-align the new size, as long as the 1501 * new size is not too large. 1502 */ 1503 if (size < ZoneLimit) { 1504 zoneindex(&size, &chunking); 1505 if (z->z_ChunkSize == size) { 1506 return(ptr); 1507 } 1508 } 1509 1510 /* 1511 * Allocate memory for the new request size and copy as appropriate. 1512 */ 1513 if ((nptr = _slaballoc(size, 0)) != NULL) { 1514 if (size > z->z_ChunkSize) 1515 size = z->z_ChunkSize; 1516 bcopy(ptr, nptr, size); 1517 _slabfree(ptr, 0, NULL); 1518 } 1519 1520 return(nptr); 1521 } 1522 1523 /* 1524 * free (SLAB ALLOCATOR) 1525 * 1526 * Free a memory block previously allocated by malloc. Note that we do not 1527 * attempt to uplodate ks_loosememuse as MP races could prevent us from 1528 * checking memory limits in malloc. 1529 * 1530 * flags: 1531 * FASTSLABREALLOC Fast call from realloc, *rbigp already 1532 * unlinked. 1533 * 1534 * MPSAFE 1535 */ 1536 static void 1537 _slabfree(void *ptr, int flags, bigalloc_t *rbigp) 1538 { 1539 slzone_t z; 1540 slchunk_t chunk; 1541 bigalloc_t big; 1542 bigalloc_t *bigp; 1543 slglobaldata_t slgd; 1544 size_t size; 1545 int zi; 1546 int pgno; 1547 1548 /* Fast realloc path for big allocations */ 1549 if (flags & FASTSLABREALLOC) { 1550 big = *rbigp; 1551 goto fastslabrealloc; 1552 } 1553 1554 /* 1555 * Handle NULL frees and special 0-byte allocations 1556 */ 1557 if (ptr == NULL) 1558 return; 1559 1560 /* 1561 * Handle oversized allocations. 1562 */ 1563 if ((bigp = bigalloc_check_and_lock(ptr)) != NULL) { 1564 while ((big = *bigp) != NULL) { 1565 if (big->base == ptr) { 1566 *bigp = big->next; 1567 atomic_add_long(&excess_alloc, big->active - 1568 big->bytes); 1569 bigalloc_unlock(ptr); 1570 1571 /* 1572 * Try to stash the block we are freeing, 1573 * potentially receiving another block in 1574 * return which must be freed. 1575 */ 1576 fastslabrealloc: 1577 if (big->bytes <= BIGCACHE_LIMIT) { 1578 big = bigcache_find_free(big); 1579 if (big == NULL) 1580 return; 1581 } 1582 ptr = big->base; /* reload */ 1583 size = big->bytes; 1584 _slabfree(big, 0, NULL); 1585 _vmem_free(ptr, size); 1586 return; 1587 } 1588 bigp = &big->next; 1589 } 1590 bigalloc_unlock(ptr); 1591 handle_excess_big(); 1592 } 1593 1594 /* 1595 * Zone case. Figure out the zone based on the fact that it is 1596 * ZoneSize aligned. 1597 */ 1598 z = (slzone_t)((uintptr_t)ptr & ~(uintptr_t)ZoneMask); 1599 MASSERT(z->z_Magic == ZALLOC_SLAB_MAGIC); 1600 1601 size = z->z_ChunkSize; 1602 zi = z->z_ZoneIndex; 1603 1604 if (g_malloc_flags & SAFLAG_ZERO) 1605 bzero(ptr, size); 1606 1607 if (mtmagazine_free(zi, ptr) == 0) 1608 return; 1609 1610 pgno = ((char *)ptr - (char *)z) >> PAGE_SHIFT; 1611 chunk = ptr; 1612 1613 /* 1614 * Add this free non-zero'd chunk to a linked list for reuse, adjust 1615 * z_FirstFreePg. 1616 */ 1617 slgd = &SLGlobalData; 1618 slgd_lock(slgd); 1619 1620 chunk->c_Next = z->z_PageAry[pgno]; 1621 z->z_PageAry[pgno] = chunk; 1622 if (z->z_FirstFreePg > pgno) 1623 z->z_FirstFreePg = pgno; 1624 1625 /* 1626 * Bump the number of free chunks. If it becomes non-zero the zone 1627 * must be added back onto the appropriate list. 1628 */ 1629 if (z->z_NFree++ == 0) { 1630 z->z_Next = slgd->ZoneAry[z->z_ZoneIndex]; 1631 slgd->ZoneAry[z->z_ZoneIndex] = z; 1632 } 1633 1634 /* 1635 * If the zone becomes totally free we get rid of it. 1636 */ 1637 if (z->z_NFree == z->z_NMax) { 1638 slzone_t *pz; 1639 1640 pz = &slgd->ZoneAry[z->z_ZoneIndex]; 1641 while (z != *pz) 1642 pz = &(*pz)->z_Next; 1643 *pz = z->z_Next; 1644 z->z_Magic = -1; 1645 z->z_Next = NULL; 1646 slgd_unlock(slgd); 1647 zone_free(z); 1648 } else { 1649 slgd_unlock(slgd); 1650 } 1651 } 1652 1653 /* 1654 * Allocate and return a magazine. NULL is returned and *burst is adjusted 1655 * if the magazine is empty. 1656 */ 1657 static __inline void * 1658 magazine_alloc(struct magazine *mp, int *burst) 1659 { 1660 void *obj; 1661 1662 if (mp == NULL) 1663 return(NULL); 1664 if (MAGAZINE_NOTEMPTY(mp)) { 1665 obj = mp->objects[--mp->rounds]; 1666 return(obj); 1667 } 1668 1669 /* 1670 * Return burst factor to caller along with NULL 1671 */ 1672 if ((mp->flags & M_BURST) && (burst != NULL)) { 1673 *burst = mp->burst_factor; 1674 } 1675 /* Reduce burst factor by NSCALE; if it hits 1, disable BURST */ 1676 if ((mp->flags & M_BURST) && (mp->flags & M_BURST_EARLY) && 1677 (burst != NULL)) { 1678 mp->burst_factor -= M_BURST_NSCALE; 1679 if (mp->burst_factor <= 1) { 1680 mp->burst_factor = 1; 1681 mp->flags &= ~(M_BURST); 1682 mp->flags &= ~(M_BURST_EARLY); 1683 } 1684 } 1685 return (NULL); 1686 } 1687 1688 static __inline int 1689 magazine_free(struct magazine *mp, void *p) 1690 { 1691 if (mp != NULL && MAGAZINE_NOTFULL(mp)) { 1692 mp->objects[mp->rounds++] = p; 1693 return 0; 1694 } 1695 1696 return -1; 1697 } 1698 1699 static void * 1700 mtmagazine_alloc(int zi, int flags) 1701 { 1702 thr_mags *tp; 1703 struct magazine *mp, *emptymag; 1704 magazine_depot *d; 1705 void *obj; 1706 1707 /* 1708 * Do not try to access per-thread magazines while the mtmagazine 1709 * is being initialized or destroyed. 1710 */ 1711 tp = &thread_mags; 1712 if (tp->init < 0) 1713 return(NULL); 1714 1715 /* 1716 * Primary per-thread allocation loop 1717 */ 1718 nmalloc_sigblockall(); 1719 for (;;) { 1720 /* 1721 * Make sure we have a magazine available for use. 1722 */ 1723 if (tp->newmag == NULL && (flags & SAFLAG_MAGS) == 0) { 1724 mp = _slaballoc(sizeof(struct magazine), 1725 SAFLAG_ZERO | SAFLAG_MAGS); 1726 if (mp == NULL) { 1727 obj = NULL; 1728 break; 1729 } 1730 if (tp->newmag) { 1731 _slabfree(mp, 0, NULL); 1732 } else { 1733 tp->newmag = mp; 1734 } 1735 } 1736 1737 /* 1738 * If the loaded magazine has rounds, allocate and return 1739 */ 1740 mp = tp->mags[zi].loaded; 1741 obj = magazine_alloc(mp, NULL); 1742 if (obj) 1743 break; 1744 1745 /* 1746 * The prev magazine can only be completely empty or completely 1747 * full. If it is full, swap it with the loaded magazine 1748 * and retry. 1749 */ 1750 mp = tp->mags[zi].prev; 1751 if (mp && MAGAZINE_FULL(mp)) { 1752 MASSERT(mp->rounds != 0); 1753 swap_mags(&tp->mags[zi]); /* prev now empty */ 1754 continue; 1755 } 1756 1757 /* 1758 * If the depot has no loaded magazines ensure that tp->loaded 1759 * is not NULL and return NULL. This will allow _slaballoc() 1760 * to cache referals to SLGlobalData in a magazine. 1761 */ 1762 d = &depots[zi]; 1763 if (SLIST_EMPTY(&d->full)) { /* UNLOCKED TEST IS SAFE */ 1764 mp = tp->mags[zi].loaded; 1765 if (mp == NULL && tp->newmag) { 1766 mp = tp->newmag; 1767 tp->newmag = NULL; 1768 mp->capacity = M_MAX_ROUNDS; 1769 mp->rounds = 0; 1770 mp->flags = 0; 1771 tp->mags[zi].loaded = mp; 1772 } 1773 break; 1774 } 1775 1776 /* 1777 * Cycle: depot(loaded) -> loaded -> prev -> depot(empty) 1778 * 1779 * If we race and the depot has no full magazines, retry. 1780 */ 1781 depot_lock(d); 1782 mp = SLIST_FIRST(&d->full); 1783 if (mp) { 1784 SLIST_REMOVE_HEAD(&d->full, nextmagazine); 1785 emptymag = tp->mags[zi].prev; 1786 if (emptymag) { 1787 SLIST_INSERT_HEAD(&d->empty, emptymag, 1788 nextmagazine); 1789 } 1790 tp->mags[zi].prev = tp->mags[zi].loaded; 1791 tp->mags[zi].loaded = mp; 1792 MASSERT(MAGAZINE_NOTEMPTY(mp)); 1793 } 1794 depot_unlock(d); 1795 continue; 1796 } 1797 nmalloc_sigunblockall(); 1798 1799 return (obj); 1800 } 1801 1802 static int 1803 mtmagazine_free(int zi, void *ptr) 1804 { 1805 thr_mags *tp; 1806 struct magazine *mp, *loadedmag; 1807 magazine_depot *d; 1808 int rc = -1; 1809 1810 /* 1811 * Do not try to access per-thread magazines while the mtmagazine 1812 * is being initialized or destroyed. 1813 */ 1814 tp = &thread_mags; 1815 if (tp->init < 0) 1816 return(-1); 1817 1818 /* 1819 * Primary per-thread freeing loop 1820 */ 1821 nmalloc_sigblockall(); 1822 for (;;) { 1823 /* 1824 * Make sure a new magazine is available in case we have 1825 * to use it. Staging the newmag allows us to avoid 1826 * some locking/reentrancy complexity. 1827 * 1828 * Temporarily disable the per-thread caches for this 1829 * allocation to avoid reentrancy and/or to avoid a 1830 * stack overflow if the [zi] happens to be the same that 1831 * would be used to allocate the new magazine. 1832 */ 1833 if (tp->newmag == NULL) { 1834 tp->newmag = _slaballoc(sizeof(struct magazine), 1835 SAFLAG_ZERO); 1836 if (tp->newmag == NULL) { 1837 rc = -1; 1838 break; 1839 } 1840 } 1841 1842 /* 1843 * If the loaded magazine has space, free directly to it 1844 */ 1845 rc = magazine_free(tp->mags[zi].loaded, ptr); 1846 if (rc == 0) 1847 break; 1848 1849 /* 1850 * The prev magazine can only be completely empty or completely 1851 * full. If it is empty, swap it with the loaded magazine 1852 * and retry. 1853 */ 1854 mp = tp->mags[zi].prev; 1855 if (mp && MAGAZINE_EMPTY(mp)) { 1856 MASSERT(mp->rounds == 0); 1857 swap_mags(&tp->mags[zi]); /* prev now full */ 1858 continue; 1859 } 1860 1861 /* 1862 * Try to get an empty magazine from the depot. Cycle 1863 * through depot(empty)->loaded->prev->depot(full). 1864 * Retry if an empty magazine was available from the depot. 1865 */ 1866 d = &depots[zi]; 1867 depot_lock(d); 1868 1869 if ((loadedmag = tp->mags[zi].prev) != NULL) 1870 SLIST_INSERT_HEAD(&d->full, loadedmag, nextmagazine); 1871 tp->mags[zi].prev = tp->mags[zi].loaded; 1872 mp = SLIST_FIRST(&d->empty); 1873 if (mp) { 1874 tp->mags[zi].loaded = mp; 1875 SLIST_REMOVE_HEAD(&d->empty, nextmagazine); 1876 depot_unlock(d); 1877 MASSERT(MAGAZINE_NOTFULL(mp)); 1878 } else { 1879 mp = tp->newmag; 1880 tp->newmag = NULL; 1881 mp->capacity = M_MAX_ROUNDS; 1882 mp->rounds = 0; 1883 mp->flags = 0; 1884 tp->mags[zi].loaded = mp; 1885 depot_unlock(d); 1886 } 1887 } 1888 nmalloc_sigunblockall(); 1889 1890 return rc; 1891 } 1892 1893 static void 1894 mtmagazine_init(void) 1895 { 1896 int error; 1897 1898 error = _pthread_key_create(&thread_mags_key, mtmagazine_destructor); 1899 if (error) 1900 abort(); 1901 } 1902 1903 /* 1904 * This function is only used by the thread exit destructor 1905 */ 1906 static void 1907 mtmagazine_drain(struct magazine *mp) 1908 { 1909 void *obj; 1910 1911 while (MAGAZINE_NOTEMPTY(mp)) { 1912 obj = magazine_alloc(mp, NULL); 1913 _slabfree(obj, 0, NULL); 1914 } 1915 } 1916 1917 /* 1918 * mtmagazine_destructor() 1919 * 1920 * When a thread exits, we reclaim all its resources; all its magazines are 1921 * drained and the structures are freed. 1922 * 1923 * WARNING! The destructor can be called multiple times if the larger user 1924 * program has its own destructors which run after ours which 1925 * allocate or free memory. 1926 */ 1927 static void 1928 mtmagazine_destructor(void *thrp) 1929 { 1930 thr_mags *tp = thrp; 1931 struct magazine *mp; 1932 int i; 1933 1934 if (__isexiting) 1935 return; 1936 1937 /* 1938 * Prevent further use of mtmagazines while we are destructing 1939 * them, as well as for any destructors which are run after us 1940 * prior to the thread actually being destroyed. 1941 */ 1942 tp->init = -1; 1943 1944 for (i = 0; i < NZONES; i++) { 1945 mp = tp->mags[i].loaded; 1946 tp->mags[i].loaded = NULL; 1947 if (mp) { 1948 if (MAGAZINE_NOTEMPTY(mp)) 1949 mtmagazine_drain(mp); 1950 _slabfree(mp, 0, NULL); 1951 } 1952 1953 mp = tp->mags[i].prev; 1954 tp->mags[i].prev = NULL; 1955 if (mp) { 1956 if (MAGAZINE_NOTEMPTY(mp)) 1957 mtmagazine_drain(mp); 1958 _slabfree(mp, 0, NULL); 1959 } 1960 } 1961 1962 if (tp->newmag) { 1963 mp = tp->newmag; 1964 tp->newmag = NULL; 1965 _slabfree(mp, 0, NULL); 1966 } 1967 } 1968 1969 /* 1970 * zone_alloc() 1971 * 1972 * Attempt to allocate a zone from the zone magazine; the zone magazine has 1973 * M_BURST_EARLY enabled, so honor the burst request from the magazine. 1974 */ 1975 static slzone_t 1976 zone_alloc(int flags) 1977 { 1978 int burst = 1; 1979 int i, j; 1980 slzone_t z; 1981 1982 zone_magazine_lock(); 1983 1984 z = magazine_alloc(&zone_magazine, &burst); 1985 if (z == NULL && burst == 1) { 1986 zone_magazine_unlock(); 1987 z = _vmem_alloc(ZoneSize * burst, ZoneSize, flags); 1988 } else if (z == NULL) { 1989 z = _vmem_alloc(ZoneSize * burst, ZoneSize, flags); 1990 if (z) { 1991 for (i = 1; i < burst; i++) { 1992 j = magazine_free(&zone_magazine, 1993 (char *) z + (ZoneSize * i)); 1994 MASSERT_WTHUNLK(j == 0, zone_magazine_unlock()); 1995 } 1996 } 1997 zone_magazine_unlock(); 1998 } else { 1999 z->z_Flags |= SLZF_UNOTZEROD; 2000 zone_magazine_unlock(); 2001 } 2002 2003 return z; 2004 } 2005 2006 /* 2007 * Free a zone. 2008 */ 2009 static void 2010 zone_free(void *z) 2011 { 2012 void *excess[M_ZONE_ROUNDS - M_LOW_ROUNDS] = {}; 2013 int i, j; 2014 2015 zone_magazine_lock(); 2016 2017 bzero(z, sizeof(struct slzone)); 2018 2019 if (opt_madvise) 2020 madvise(z, ZoneSize, MADV_FREE); 2021 2022 i = magazine_free(&zone_magazine, z); 2023 2024 /* 2025 * If we failed to free, collect excess magazines; release the zone 2026 * magazine lock, and then free to the system via _vmem_free. Re-enable 2027 * BURST mode for the magazine. 2028 */ 2029 if (i == -1) { 2030 j = zone_magazine.rounds - zone_magazine.low_factor; 2031 for (i = 0; i < j; i++) { 2032 excess[i] = magazine_alloc(&zone_magazine, NULL); 2033 MASSERT_WTHUNLK(excess[i] != NULL, 2034 zone_magazine_unlock()); 2035 } 2036 2037 zone_magazine_unlock(); 2038 2039 for (i = 0; i < j; i++) 2040 _vmem_free(excess[i], ZoneSize); 2041 2042 _vmem_free(z, ZoneSize); 2043 } else { 2044 zone_magazine_unlock(); 2045 } 2046 } 2047 2048 /* 2049 * _vmem_alloc() 2050 * 2051 * Directly map memory in PAGE_SIZE'd chunks with the specified 2052 * alignment. 2053 * 2054 * Alignment must be a multiple of PAGE_SIZE. 2055 * 2056 * Size must be >= alignment. 2057 */ 2058 static void * 2059 _vmem_alloc(size_t size, size_t align, int flags) 2060 { 2061 static char *addr_hint; 2062 static int reset_hint = 16; 2063 char *addr; 2064 char *save; 2065 2066 if (--reset_hint <= 0) { 2067 addr_hint = NULL; 2068 reset_hint = 16; 2069 } 2070 2071 /* 2072 * Map anonymous private memory. 2073 */ 2074 save = mmap(addr_hint, size, PROT_READ|PROT_WRITE, 2075 MAP_PRIVATE|MAP_ANON, -1, 0); 2076 if (save == MAP_FAILED) 2077 goto worst_case; 2078 if (((uintptr_t)save & (align - 1)) == 0) 2079 return((void *)save); 2080 2081 addr_hint = (char *)(((size_t)save + (align - 1)) & ~(align - 1)); 2082 munmap(save, size); 2083 2084 save = mmap(addr_hint, size, PROT_READ|PROT_WRITE, 2085 MAP_PRIVATE|MAP_ANON, -1, 0); 2086 if (save == MAP_FAILED) 2087 goto worst_case; 2088 if (((size_t)save & (align - 1)) == 0) 2089 return((void *)save); 2090 munmap(save, size); 2091 2092 worst_case: 2093 save = mmap(NULL, size + align, PROT_READ|PROT_WRITE, 2094 MAP_PRIVATE|MAP_ANON, -1, 0); 2095 if (save == MAP_FAILED) 2096 return NULL; 2097 2098 addr = (char *)(((size_t)save + (align - 1)) & ~(align - 1)); 2099 if (save != addr) 2100 munmap(save, addr - save); 2101 if (addr + size != save + size + align) 2102 munmap(addr + size, save + align - addr); 2103 2104 addr_hint = addr + size; 2105 2106 return ((void *)addr); 2107 } 2108 2109 /* 2110 * _vmem_free() 2111 * 2112 * Free a chunk of memory allocated with _vmem_alloc() 2113 */ 2114 static void 2115 _vmem_free(void *ptr, size_t size) 2116 { 2117 munmap(ptr, size); 2118 } 2119 2120 /* 2121 * Panic on fatal conditions 2122 */ 2123 static void 2124 _mpanic(const char *ctl, ...) 2125 { 2126 va_list va; 2127 2128 if (malloc_panic == 0) { 2129 malloc_panic = 1; 2130 va_start(va, ctl); 2131 vfprintf(stderr, ctl, va); 2132 fprintf(stderr, "\n"); 2133 fflush(stderr); 2134 va_end(va); 2135 } 2136 abort(); 2137 } 2138 2139 __weak_reference(__aligned_alloc, aligned_alloc); 2140 __weak_reference(__malloc, malloc); 2141 __weak_reference(__calloc, calloc); 2142 __weak_reference(__posix_memalign, posix_memalign); 2143 __weak_reference(__realloc, realloc); 2144 __weak_reference(__free, free); 2145