1 /* 2 * NMALLOC.C - New Malloc (ported from kernel slab allocator) 3 * 4 * Copyright (c) 2003,2004,2009,2010 The DragonFly Project. All rights reserved. 5 * 6 * This code is derived from software contributed to The DragonFly Project 7 * by Matthew Dillon <dillon@backplane.com> and by 8 * Venkatesh Srinivas <me@endeavour.zapto.org>. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in 18 * the documentation and/or other materials provided with the 19 * distribution. 20 * 3. Neither the name of The DragonFly Project nor the names of its 21 * contributors may be used to endorse or promote products derived 22 * from this software without specific, prior written permission. 23 * 24 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 25 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 26 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 27 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 28 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 29 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, 30 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 31 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 32 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 33 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 34 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 35 * SUCH DAMAGE. 36 * 37 * $Id: nmalloc.c,v 1.37 2010/07/23 08:20:35 vsrinivas Exp $ 38 */ 39 /* 40 * This module implements a slab allocator drop-in replacement for the 41 * libc malloc(). 42 * 43 * A slab allocator reserves a ZONE for each chunk size, then lays the 44 * chunks out in an array within the zone. Allocation and deallocation 45 * is nearly instantaneous, and overhead losses are limited to a fixed 46 * worst-case amount. 47 * 48 * The slab allocator does not have to pre-initialize the list of 49 * free chunks for each zone, and the underlying VM will not be 50 * touched at all beyond the zone header until an actual allocation 51 * needs it. 52 * 53 * Slab management and locking is done on a per-zone basis. 54 * 55 * Alloc Size Chunking Number of zones 56 * 0-127 8 16 57 * 128-255 16 8 58 * 256-511 32 8 59 * 512-1023 64 8 60 * 1024-2047 128 8 61 * 2048-4095 256 8 62 * 4096-8191 512 8 63 * 8192-16383 1024 8 64 * 16384-32767 2048 8 65 * 66 * Allocations >= ZoneLimit go directly to mmap and a hash table 67 * is used to locate for free. One and Two-page allocations use the 68 * zone mechanic to avoid excessive mmap()/munmap() calls. 69 * 70 * API FEATURES AND SIDE EFFECTS 71 * 72 * + power-of-2 sized allocations up to a page will be power-of-2 aligned. 73 * Above that power-of-2 sized allocations are page-aligned. Non 74 * power-of-2 sized allocations are aligned the same as the chunk 75 * size for their zone. 76 * + malloc(0) returns a special non-NULL value 77 * + ability to allocate arbitrarily large chunks of memory 78 * + realloc will reuse the passed pointer if possible, within the 79 * limitations of the zone chunking. 80 * 81 * Multithreaded enhancements for small allocations introduced August 2010. 82 * These are in the spirit of 'libumem'. See: 83 * Bonwick, J.; Adams, J. (2001). "Magazines and Vmem: Extending the 84 * slab allocator to many CPUs and arbitrary resources". In Proc. 2001 85 * USENIX Technical Conference. USENIX Association. 86 * 87 * Oversized allocations employ the BIGCACHE mechanic whereby large 88 * allocations may be handed significantly larger buffers, allowing them 89 * to avoid mmap/munmap operations even through significant realloc()s. 90 * The excess space is only trimmed if too many large allocations have been 91 * given this treatment. 92 * 93 * TUNING 94 * 95 * The value of the environment variable MALLOC_OPTIONS is a character string 96 * containing various flags to tune nmalloc. 97 * 98 * 'U' / ['u'] Generate / do not generate utrace entries for ktrace(1) 99 * This will generate utrace events for all malloc, 100 * realloc, and free calls. There are tools (mtrplay) to 101 * replay and allocation pattern or to graph heap structure 102 * (mtrgraph) which can interpret these logs. 103 * 'Z' / ['z'] Zero out / do not zero all allocations. 104 * Each new byte of memory allocated by malloc, realloc, or 105 * reallocf will be initialized to 0. This is intended for 106 * debugging and will affect performance negatively. 107 * 'H' / ['h'] Pass a hint to the kernel about pages unused by the 108 * allocation functions. 109 */ 110 111 /* cc -shared -fPIC -g -O -I/usr/src/lib/libc/include -o nmalloc.so nmalloc.c */ 112 113 #include "namespace.h" 114 #include <sys/param.h> 115 #include <sys/types.h> 116 #include <sys/mman.h> 117 #include <sys/queue.h> 118 #include <sys/ktrace.h> 119 #include <stdio.h> 120 #include <stdint.h> 121 #include <stdlib.h> 122 #include <stdarg.h> 123 #include <stddef.h> 124 #include <unistd.h> 125 #include <string.h> 126 #include <fcntl.h> 127 #include <errno.h> 128 #include <pthread.h> 129 #include <machine/atomic.h> 130 #include "un-namespace.h" 131 132 #include "libc_private.h" 133 #include "spinlock.h" 134 135 void __free(void *); 136 void *__malloc(size_t); 137 void *__calloc(size_t, size_t); 138 void *__realloc(void *, size_t); 139 void *__aligned_alloc(size_t, size_t); 140 int __posix_memalign(void **, size_t, size_t); 141 142 /* 143 * Linked list of large allocations 144 */ 145 typedef struct bigalloc { 146 struct bigalloc *next; /* hash link */ 147 void *base; /* base pointer */ 148 u_long active; /* bytes active */ 149 u_long bytes; /* bytes allocated */ 150 } *bigalloc_t; 151 152 /* 153 * Note that any allocations which are exact multiples of PAGE_SIZE, or 154 * which are >= ZALLOC_ZONE_LIMIT, will fall through to the kmem subsystem. 155 */ 156 #define MAX_SLAB_PAGEALIGN (2 * PAGE_SIZE) /* max slab for PAGE_SIZE*n */ 157 #define ZALLOC_ZONE_LIMIT (16 * 1024) /* max slab-managed alloc */ 158 #define ZALLOC_ZONE_SIZE (64 * 1024) /* zone size */ 159 #define ZALLOC_SLAB_MAGIC 0x736c6162 /* magic sanity */ 160 161 #if ZALLOC_ZONE_LIMIT == 16384 162 #define NZONES 72 163 #elif ZALLOC_ZONE_LIMIT == 32768 164 #define NZONES 80 165 #else 166 #error "I couldn't figure out NZONES" 167 #endif 168 169 /* 170 * Chunk structure for free elements 171 */ 172 typedef struct slchunk { 173 struct slchunk *c_Next; 174 } *slchunk_t; 175 176 /* 177 * The IN-BAND zone header is placed at the beginning of each zone. 178 */ 179 struct slglobaldata; 180 181 typedef struct slzone { 182 int32_t z_Magic; /* magic number for sanity check */ 183 int z_NFree; /* total free chunks / ualloc space */ 184 struct slzone *z_Next; /* ZoneAry[] link if z_NFree non-zero */ 185 int z_NMax; /* maximum free chunks */ 186 char *z_BasePtr; /* pointer to start of chunk array */ 187 int z_UIndex; /* current initial allocation index */ 188 int z_UEndIndex; /* last (first) allocation index */ 189 int z_ChunkSize; /* chunk size for validation */ 190 int z_FirstFreePg; /* chunk list on a page-by-page basis */ 191 int z_ZoneIndex; 192 int z_Flags; 193 struct slchunk *z_PageAry[ZALLOC_ZONE_SIZE / PAGE_SIZE]; 194 } *slzone_t; 195 196 typedef struct slglobaldata { 197 spinlock_t Spinlock; 198 slzone_t ZoneAry[NZONES];/* linked list of zones NFree > 0 */ 199 } *slglobaldata_t; 200 201 #define SLZF_UNOTZEROD 0x0001 202 203 #define FASTSLABREALLOC 0x02 204 205 /* 206 * Misc constants. Note that allocations that are exact multiples of 207 * PAGE_SIZE, or exceed the zone limit, fall through to the kmem module. 208 * IN_SAME_PAGE_MASK is used to sanity-check the per-page free lists. 209 */ 210 #define MIN_CHUNK_SIZE 8 /* in bytes */ 211 #define MIN_CHUNK_MASK (MIN_CHUNK_SIZE - 1) 212 #define IN_SAME_PAGE_MASK (~(intptr_t)PAGE_MASK | MIN_CHUNK_MASK) 213 214 /* 215 * WARNING: A limited number of spinlocks are available, BIGXSIZE should 216 * not be larger then 64. 217 */ 218 #define BIGHSHIFT 10 /* bigalloc hash table */ 219 #define BIGHSIZE (1 << BIGHSHIFT) 220 #define BIGHMASK (BIGHSIZE - 1) 221 #define BIGXSIZE (BIGHSIZE / 16) /* bigalloc lock table */ 222 #define BIGXMASK (BIGXSIZE - 1) 223 224 /* 225 * BIGCACHE caches oversized allocations. Note that a linear search is 226 * performed, so do not make the cache too large. 227 * 228 * BIGCACHE will garbage-collect excess space when the excess exceeds the 229 * specified value. A relatively large number should be used here because 230 * garbage collection is expensive. 231 */ 232 #define BIGCACHE 16 233 #define BIGCACHE_MASK (BIGCACHE - 1) 234 #define BIGCACHE_LIMIT (1024 * 1024) /* size limit */ 235 #define BIGCACHE_EXCESS (16 * 1024 * 1024) /* garbage collect */ 236 237 #define CACHE_CHUNKS 32 238 239 #define SAFLAG_ZERO 0x0001 240 #define SAFLAG_PASSIVE 0x0002 241 #define SAFLAG_MAGS 0x0004 242 243 /* 244 * Thread control 245 */ 246 247 #define arysize(ary) (sizeof(ary)/sizeof((ary)[0])) 248 249 /* 250 * The assertion macros try to pretty-print assertion failures 251 * which can be caused by corruption. If a lock is held, we 252 * provide a macro that attempts to release it before asserting 253 * in order to prevent (e.g.) a reentrant SIGABRT calling malloc 254 * and deadlocking, resulting in the program freezing up. 255 */ 256 #define MASSERT(exp) \ 257 do { if (__predict_false(!(exp))) \ 258 _mpanic("assertion: %s in %s", \ 259 #exp, __func__); \ 260 } while (0) 261 262 #define MASSERT_WTHUNLK(exp, unlk) \ 263 do { if (__predict_false(!(exp))) { \ 264 unlk; \ 265 _mpanic("assertion: %s in %s", \ 266 #exp, __func__); \ 267 } \ 268 } while (0) 269 270 /* 271 * Magazines, arrange so the structure is roughly 4KB. 272 */ 273 #define M_MAX_ROUNDS (512 - 3) 274 #define M_MIN_ROUNDS 16 275 #define M_ZONE_INIT_ROUNDS 64 276 #define M_ZONE_HYSTERESIS 32 277 278 struct magazine { 279 SLIST_ENTRY(magazine) nextmagazine; 280 281 int flags; 282 int capacity; /* Max rounds in this magazine */ 283 int rounds; /* Current number of free rounds */ 284 int unused01; 285 void *objects[M_MAX_ROUNDS]; 286 }; 287 288 SLIST_HEAD(magazinelist, magazine); 289 290 static spinlock_t zone_mag_lock; 291 static spinlock_t depot_spinlock; 292 static struct magazine zone_magazine = { 293 .flags = 0, 294 .capacity = M_ZONE_INIT_ROUNDS, 295 .rounds = 0, 296 }; 297 298 #define MAGAZINE_FULL(mp) (mp->rounds == mp->capacity) 299 #define MAGAZINE_NOTFULL(mp) (mp->rounds < mp->capacity) 300 #define MAGAZINE_EMPTY(mp) (mp->rounds == 0) 301 #define MAGAZINE_NOTEMPTY(mp) (mp->rounds != 0) 302 303 /* 304 * Each thread will have a pair of magazines per size-class (NZONES) 305 * The loaded magazine will support immediate allocations, the previous 306 * magazine will either be full or empty and can be swapped at need 307 */ 308 typedef struct magazine_pair { 309 struct magazine *loaded; 310 struct magazine *prev; 311 } magazine_pair; 312 313 /* A depot is a collection of magazines for a single zone. */ 314 typedef struct magazine_depot { 315 struct magazinelist full; 316 struct magazinelist empty; 317 spinlock_t lock; 318 } magazine_depot; 319 320 typedef struct thr_mags { 321 magazine_pair mags[NZONES]; 322 struct magazine *newmag; 323 int init; 324 } thr_mags; 325 326 static __thread thr_mags thread_mags TLS_ATTRIBUTE; 327 static pthread_key_t thread_mags_key; 328 static pthread_once_t thread_mags_once = PTHREAD_ONCE_INIT; 329 static magazine_depot depots[NZONES]; 330 331 /* 332 * Fixed globals (not per-cpu) 333 */ 334 static const int ZoneSize = ZALLOC_ZONE_SIZE; 335 static const int ZoneLimit = ZALLOC_ZONE_LIMIT; 336 static const int ZonePageCount = ZALLOC_ZONE_SIZE / PAGE_SIZE; 337 static const int ZoneMask = ZALLOC_ZONE_SIZE - 1; 338 339 static int opt_madvise = 0; 340 static int opt_utrace = 0; 341 static int g_malloc_flags = 0; 342 static struct slglobaldata SLGlobalData; 343 static bigalloc_t bigalloc_array[BIGHSIZE]; 344 static spinlock_t bigspin_array[BIGXSIZE]; 345 static volatile void *bigcache_array[BIGCACHE]; /* atomic swap */ 346 static volatile size_t bigcache_size_array[BIGCACHE]; /* SMP races ok */ 347 static volatile int bigcache_index; /* SMP races ok */ 348 static int malloc_panic; 349 static size_t excess_alloc; /* excess big allocs */ 350 351 static void *_slaballoc(size_t size, int flags); 352 static void *_slabrealloc(void *ptr, size_t size); 353 static void _slabfree(void *ptr, int, bigalloc_t *); 354 static int _slabmemalign(void **memptr, size_t alignment, size_t size); 355 static void *_vmem_alloc(size_t bytes, size_t align, int flags); 356 static void _vmem_free(void *ptr, size_t bytes); 357 static void *magazine_alloc(struct magazine *); 358 static int magazine_free(struct magazine *, void *); 359 static void *mtmagazine_alloc(int zi, int flags); 360 static int mtmagazine_free(int zi, void *); 361 static void mtmagazine_init(void); 362 static void mtmagazine_destructor(void *); 363 static slzone_t zone_alloc(int flags); 364 static void zone_free(void *z); 365 static void _mpanic(const char *ctl, ...) __printflike(1, 2); 366 static void malloc_init(void) __constructor(101); 367 368 struct nmalloc_utrace { 369 void *p; 370 size_t s; 371 void *r; 372 }; 373 374 #define UTRACE(a, b, c) \ 375 if (opt_utrace) { \ 376 struct nmalloc_utrace ut = { \ 377 .p = (a), \ 378 .s = (b), \ 379 .r = (c) \ 380 }; \ 381 utrace(&ut, sizeof(ut)); \ 382 } 383 384 static void 385 malloc_init(void) 386 { 387 const char *p = NULL; 388 389 if (issetugid() == 0) 390 p = getenv("MALLOC_OPTIONS"); 391 392 for (; p != NULL && *p != '\0'; p++) { 393 switch(*p) { 394 case 'u': opt_utrace = 0; break; 395 case 'U': opt_utrace = 1; break; 396 case 'h': opt_madvise = 0; break; 397 case 'H': opt_madvise = 1; break; 398 case 'z': g_malloc_flags = 0; break; 399 case 'Z': g_malloc_flags = SAFLAG_ZERO; break; 400 default: 401 break; 402 } 403 } 404 405 UTRACE((void *) -1, 0, NULL); 406 } 407 408 /* 409 * We have to install a handler for nmalloc thread teardowns when 410 * the thread is created. We cannot delay this because destructors in 411 * sophisticated userland programs can call malloc() for the first time 412 * during their thread exit. 413 * 414 * This routine is called directly from pthreads. 415 */ 416 void 417 _nmalloc_thr_init(void) 418 { 419 thr_mags *tp; 420 421 /* 422 * Disallow mtmagazine operations until the mtmagazine is 423 * initialized. 424 */ 425 tp = &thread_mags; 426 tp->init = -1; 427 428 _pthread_once(&thread_mags_once, mtmagazine_init); 429 _pthread_setspecific(thread_mags_key, tp); 430 tp->init = 1; 431 } 432 433 void 434 _nmalloc_thr_prepfork(void) 435 { 436 if (__isthreaded) { 437 _SPINLOCK(&zone_mag_lock); 438 _SPINLOCK(&depot_spinlock); 439 } 440 } 441 442 void 443 _nmalloc_thr_parentfork(void) 444 { 445 if (__isthreaded) { 446 _SPINUNLOCK(&depot_spinlock); 447 _SPINUNLOCK(&zone_mag_lock); 448 } 449 } 450 451 void 452 _nmalloc_thr_childfork(void) 453 { 454 if (__isthreaded) { 455 _SPINUNLOCK(&depot_spinlock); 456 _SPINUNLOCK(&zone_mag_lock); 457 } 458 } 459 460 /* 461 * Handle signal reentrancy safely whether we are threaded or not. 462 * This improves the stability for mono and will probably improve 463 * stability for other high-level languages which are becoming increasingly 464 * sophisticated. 465 * 466 * The sigblockall()/sigunblockall() implementation uses a counter on 467 * a per-thread shared user/kernel page, avoids system calls, and is thus 468 * very fast. 469 */ 470 static __inline void 471 nmalloc_sigblockall(void) 472 { 473 sigblockall(); 474 } 475 476 static __inline void 477 nmalloc_sigunblockall(void) 478 { 479 sigunblockall(); 480 } 481 482 /* 483 * Thread locks. 484 */ 485 static __inline void 486 slgd_lock(slglobaldata_t slgd) 487 { 488 if (__isthreaded) 489 _SPINLOCK(&slgd->Spinlock); 490 else 491 sigblockall(); 492 } 493 494 static __inline void 495 slgd_unlock(slglobaldata_t slgd) 496 { 497 if (__isthreaded) 498 _SPINUNLOCK(&slgd->Spinlock); 499 else 500 sigunblockall(); 501 } 502 503 static __inline void 504 depot_lock(magazine_depot *dp __unused) 505 { 506 if (__isthreaded) 507 _SPINLOCK(&depot_spinlock); 508 else 509 sigblockall(); 510 #if 0 511 if (__isthreaded) 512 _SPINLOCK(&dp->lock); 513 #endif 514 } 515 516 static __inline void 517 depot_unlock(magazine_depot *dp __unused) 518 { 519 if (__isthreaded) 520 _SPINUNLOCK(&depot_spinlock); 521 else 522 sigunblockall(); 523 #if 0 524 if (__isthreaded) 525 _SPINUNLOCK(&dp->lock); 526 #endif 527 } 528 529 static __inline void 530 zone_magazine_lock(void) 531 { 532 if (__isthreaded) 533 _SPINLOCK(&zone_mag_lock); 534 else 535 sigblockall(); 536 } 537 538 static __inline void 539 zone_magazine_unlock(void) 540 { 541 if (__isthreaded) 542 _SPINUNLOCK(&zone_mag_lock); 543 else 544 sigunblockall(); 545 } 546 547 static __inline void 548 swap_mags(magazine_pair *mp) 549 { 550 struct magazine *tmp; 551 tmp = mp->loaded; 552 mp->loaded = mp->prev; 553 mp->prev = tmp; 554 } 555 556 /* 557 * bigalloc hashing and locking support. 558 * 559 * Return an unmasked hash code for the passed pointer. 560 */ 561 static __inline int 562 _bigalloc_hash(void *ptr) 563 { 564 int hv; 565 566 hv = ((int)(intptr_t)ptr >> PAGE_SHIFT) ^ 567 ((int)(intptr_t)ptr >> (PAGE_SHIFT + BIGHSHIFT)); 568 569 return(hv); 570 } 571 572 /* 573 * Lock the hash chain and return a pointer to its base for the specified 574 * address. 575 */ 576 static __inline bigalloc_t * 577 bigalloc_lock(void *ptr) 578 { 579 int hv = _bigalloc_hash(ptr); 580 bigalloc_t *bigp; 581 582 bigp = &bigalloc_array[hv & BIGHMASK]; 583 if (__isthreaded) 584 _SPINLOCK(&bigspin_array[hv & BIGXMASK]); 585 return(bigp); 586 } 587 588 /* 589 * Lock the hash chain and return a pointer to its base for the specified 590 * address. 591 * 592 * BUT, if the hash chain is empty, just return NULL and do not bother 593 * to lock anything. 594 */ 595 static __inline bigalloc_t * 596 bigalloc_check_and_lock(void *ptr) 597 { 598 int hv = _bigalloc_hash(ptr); 599 bigalloc_t *bigp; 600 601 bigp = &bigalloc_array[hv & BIGHMASK]; 602 if (*bigp == NULL) 603 return(NULL); 604 if (__isthreaded) { 605 _SPINLOCK(&bigspin_array[hv & BIGXMASK]); 606 } 607 return(bigp); 608 } 609 610 static __inline void 611 bigalloc_unlock(void *ptr) 612 { 613 int hv; 614 615 if (__isthreaded) { 616 hv = _bigalloc_hash(ptr); 617 _SPINUNLOCK(&bigspin_array[hv & BIGXMASK]); 618 } 619 } 620 621 /* 622 * Find a bigcache entry that might work for the allocation. SMP races are 623 * ok here except for the swap (that is, it is ok if bigcache_size_array[i] 624 * is wrong or if a NULL or too-small big is returned). 625 * 626 * Generally speaking it is ok to find a large entry even if the bytes 627 * requested are relatively small (but still oversized), because we really 628 * don't know *what* the application is going to do with the buffer. 629 */ 630 static __inline 631 bigalloc_t 632 bigcache_find_alloc(size_t bytes) 633 { 634 bigalloc_t big = NULL; 635 size_t test; 636 int i; 637 638 for (i = 0; i < BIGCACHE; ++i) { 639 test = bigcache_size_array[i]; 640 if (bytes <= test) { 641 bigcache_size_array[i] = 0; 642 big = atomic_swap_ptr(&bigcache_array[i], NULL); 643 break; 644 } 645 } 646 return big; 647 } 648 649 /* 650 * Free a bigcache entry, possibly returning one that the caller really must 651 * free. This is used to cache recent oversized memory blocks. Only 652 * big blocks smaller than BIGCACHE_LIMIT will be cached this way, so try 653 * to collect the biggest ones we can that are under the limit. 654 */ 655 static __inline 656 bigalloc_t 657 bigcache_find_free(bigalloc_t big) 658 { 659 int i; 660 int j; 661 int b; 662 663 b = ++bigcache_index; 664 for (i = 0; i < BIGCACHE; ++i) { 665 j = (b + i) & BIGCACHE_MASK; 666 if (bigcache_size_array[j] < big->bytes) { 667 bigcache_size_array[j] = big->bytes; 668 big = atomic_swap_ptr(&bigcache_array[j], big); 669 break; 670 } 671 } 672 return big; 673 } 674 675 static __inline 676 void 677 handle_excess_big(void) 678 { 679 int i; 680 bigalloc_t big; 681 bigalloc_t *bigp; 682 683 if (excess_alloc <= BIGCACHE_EXCESS) 684 return; 685 686 for (i = 0; i < BIGHSIZE; ++i) { 687 bigp = &bigalloc_array[i]; 688 if (*bigp == NULL) 689 continue; 690 if (__isthreaded) 691 _SPINLOCK(&bigspin_array[i & BIGXMASK]); 692 for (big = *bigp; big; big = big->next) { 693 if (big->active < big->bytes) { 694 MASSERT_WTHUNLK((big->active & PAGE_MASK) == 0, 695 _SPINUNLOCK(&bigspin_array[i & BIGXMASK])); 696 MASSERT_WTHUNLK((big->bytes & PAGE_MASK) == 0, 697 _SPINUNLOCK(&bigspin_array[i & BIGXMASK])); 698 munmap((char *)big->base + big->active, 699 big->bytes - big->active); 700 atomic_add_long(&excess_alloc, 701 big->active - big->bytes); 702 big->bytes = big->active; 703 } 704 } 705 if (__isthreaded) 706 _SPINUNLOCK(&bigspin_array[i & BIGXMASK]); 707 } 708 } 709 710 /* 711 * Calculate the zone index for the allocation request size and set the 712 * allocation request size to that particular zone's chunk size. 713 */ 714 static __inline int 715 zoneindex(size_t *bytes, size_t *chunking) 716 { 717 size_t n = (unsigned int)*bytes; /* unsigned for shift opt */ 718 719 /* 720 * This used to be 8-byte chunks and 16 zones for n < 128. 721 * However some instructions may require 16-byte alignment 722 * (aka SIMD) and programs might not request an aligned size 723 * (aka GCC-7), so change this as follows: 724 * 725 * 0-15 bytes 8-byte alignment in two zones (0-1) 726 * 16-127 bytes 16-byte alignment in four zones (3-10) 727 * zone index 2 and 11-15 are currently unused. 728 */ 729 if (n < 16) { 730 *bytes = n = (n + 7) & ~7; 731 *chunking = 8; 732 return(n / 8 - 1); /* 8 byte chunks, 2 zones */ 733 /* zones 0,1, zone 2 is unused */ 734 } 735 if (n < 128) { 736 *bytes = n = (n + 15) & ~15; 737 *chunking = 16; 738 return(n / 16 + 2); /* 16 byte chunks, 8 zones */ 739 /* zones 3-10, zones 11-15 unused */ 740 } 741 if (n < 256) { 742 *bytes = n = (n + 15) & ~15; 743 *chunking = 16; 744 return(n / 16 + 7); 745 } 746 if (n < 8192) { 747 if (n < 512) { 748 *bytes = n = (n + 31) & ~31; 749 *chunking = 32; 750 return(n / 32 + 15); 751 } 752 if (n < 1024) { 753 *bytes = n = (n + 63) & ~63; 754 *chunking = 64; 755 return(n / 64 + 23); 756 } 757 if (n < 2048) { 758 *bytes = n = (n + 127) & ~127; 759 *chunking = 128; 760 return(n / 128 + 31); 761 } 762 if (n < 4096) { 763 *bytes = n = (n + 255) & ~255; 764 *chunking = 256; 765 return(n / 256 + 39); 766 } 767 *bytes = n = (n + 511) & ~511; 768 *chunking = 512; 769 return(n / 512 + 47); 770 } 771 #if ZALLOC_ZONE_LIMIT > 8192 772 if (n < 16384) { 773 *bytes = n = (n + 1023) & ~1023; 774 *chunking = 1024; 775 return(n / 1024 + 55); 776 } 777 #endif 778 #if ZALLOC_ZONE_LIMIT > 16384 779 if (n < 32768) { 780 *bytes = n = (n + 2047) & ~2047; 781 *chunking = 2048; 782 return(n / 2048 + 63); 783 } 784 #endif 785 _mpanic("Unexpected byte count %zu", n); 786 return(0); 787 } 788 789 /* 790 * We want large magazines for small allocations 791 */ 792 static __inline int 793 zonecapacity(int zi) 794 { 795 int cap; 796 797 cap = (NZONES - zi) * (M_MAX_ROUNDS - M_MIN_ROUNDS) / NZONES + 798 M_MIN_ROUNDS; 799 800 return cap; 801 } 802 803 /* 804 * malloc() - call internal slab allocator 805 */ 806 void * 807 __malloc(size_t size) 808 { 809 void *ptr; 810 811 nmalloc_sigblockall(); 812 ptr = _slaballoc(size, 0); 813 if (ptr == NULL) 814 errno = ENOMEM; 815 else 816 UTRACE(0, size, ptr); 817 nmalloc_sigunblockall(); 818 819 return(ptr); 820 } 821 822 #define MUL_NO_OVERFLOW (1UL << (sizeof(size_t) * 4)) 823 824 /* 825 * calloc() - call internal slab allocator 826 */ 827 void * 828 __calloc(size_t number, size_t size) 829 { 830 void *ptr; 831 832 if ((number >= MUL_NO_OVERFLOW || size >= MUL_NO_OVERFLOW) && 833 number > 0 && SIZE_MAX / number < size) { 834 errno = ENOMEM; 835 return(NULL); 836 } 837 838 nmalloc_sigblockall(); 839 ptr = _slaballoc(number * size, SAFLAG_ZERO); 840 if (ptr == NULL) 841 errno = ENOMEM; 842 else 843 UTRACE(0, number * size, ptr); 844 nmalloc_sigunblockall(); 845 846 return(ptr); 847 } 848 849 /* 850 * realloc() (SLAB ALLOCATOR) 851 * 852 * We do not attempt to optimize this routine beyond reusing the same 853 * pointer if the new size fits within the chunking of the old pointer's 854 * zone. 855 */ 856 void * 857 __realloc(void *ptr, size_t size) 858 { 859 void *ret; 860 861 nmalloc_sigblockall(); 862 ret = _slabrealloc(ptr, size); 863 if (ret == NULL) 864 errno = ENOMEM; 865 else 866 UTRACE(ptr, size, ret); 867 nmalloc_sigunblockall(); 868 869 return(ret); 870 } 871 872 /* 873 * aligned_alloc() 874 * 875 * Allocate (size) bytes with a alignment of (alignment). 876 */ 877 void * 878 __aligned_alloc(size_t alignment, size_t size) 879 { 880 void *ptr; 881 int rc; 882 883 nmalloc_sigblockall(); 884 ptr = NULL; 885 rc = _slabmemalign(&ptr, alignment, size); 886 if (rc) 887 errno = rc; 888 nmalloc_sigunblockall(); 889 890 return (ptr); 891 } 892 893 /* 894 * posix_memalign() 895 * 896 * Allocate (size) bytes with a alignment of (alignment), where (alignment) 897 * is a power of 2 >= sizeof(void *). 898 */ 899 int 900 __posix_memalign(void **memptr, size_t alignment, size_t size) 901 { 902 int rc; 903 904 /* 905 * OpenGroup spec issue 6 check 906 */ 907 if (alignment < sizeof(void *)) { 908 *memptr = NULL; 909 return(EINVAL); 910 } 911 912 nmalloc_sigblockall(); 913 rc = _slabmemalign(memptr, alignment, size); 914 nmalloc_sigunblockall(); 915 916 return (rc); 917 } 918 919 /* 920 * The slab allocator will allocate on power-of-2 boundaries up to 921 * at least PAGE_SIZE. We use the zoneindex mechanic to find a 922 * zone matching the requirements, and _vmem_alloc() otherwise. 923 */ 924 static int 925 _slabmemalign(void **memptr, size_t alignment, size_t size) 926 { 927 bigalloc_t *bigp; 928 bigalloc_t big; 929 size_t chunking; 930 int zi __unused; 931 932 if (alignment < 1) { 933 *memptr = NULL; 934 return(EINVAL); 935 } 936 937 /* 938 * OpenGroup spec issue 6 checks 939 */ 940 if ((alignment | (alignment - 1)) + 1 != (alignment << 1)) { 941 *memptr = NULL; 942 return(EINVAL); 943 } 944 945 /* 946 * Our zone mechanism guarantees same-sized alignment for any 947 * power-of-2 allocation. If size is a power-of-2 and reasonable 948 * we can just call _slaballoc() and be done. We round size up 949 * to the nearest alignment boundary to improve our odds of 950 * it becoming a power-of-2 if it wasn't before. 951 */ 952 if (size <= alignment) 953 size = alignment; 954 else 955 size = (size + alignment - 1) & ~(size_t)(alignment - 1); 956 957 /* 958 * If we have overflowed above when rounding to the nearest alignment 959 * boundary, just return ENOMEM, size should be == N * sizeof(void *). 960 * 961 * Power-of-2 allocations up to 8KB will be aligned to the allocation 962 * size and _slaballoc() can simply be used. Please see line 1082 963 * for this special case: 'Align the storage in the zone based on 964 * the chunking' has a special case for powers of 2. 965 */ 966 if (size == 0) 967 return(ENOMEM); 968 969 if (size <= MAX_SLAB_PAGEALIGN && 970 (size | (size - 1)) + 1 == (size << 1)) { 971 *memptr = _slaballoc(size, 0); 972 return(*memptr ? 0 : ENOMEM); 973 } 974 975 /* 976 * Otherwise locate a zone with a chunking that matches 977 * the requested alignment, within reason. Consider two cases: 978 * 979 * (1) A 1K allocation on a 32-byte alignment. The first zoneindex 980 * we find will be the best fit because the chunking will be 981 * greater or equal to the alignment. 982 * 983 * (2) A 513 allocation on a 256-byte alignment. In this case 984 * the first zoneindex we find will be for 576 byte allocations 985 * with a chunking of 64, which is not sufficient. To fix this 986 * we simply find the nearest power-of-2 >= size and use the 987 * same side-effect of _slaballoc() which guarantees 988 * same-alignment on a power-of-2 allocation. 989 */ 990 if (size < PAGE_SIZE) { 991 zi = zoneindex(&size, &chunking); 992 if (chunking >= alignment) { 993 *memptr = _slaballoc(size, 0); 994 return(*memptr ? 0 : ENOMEM); 995 } 996 if (size >= 1024) 997 alignment = 1024; 998 if (size >= 16384) 999 alignment = 16384; 1000 while (alignment < size) 1001 alignment <<= 1; 1002 *memptr = _slaballoc(alignment, 0); 1003 return(*memptr ? 0 : ENOMEM); 1004 } 1005 1006 /* 1007 * If the slab allocator cannot handle it use vmem_alloc(). 1008 * 1009 * Alignment must be adjusted up to at least PAGE_SIZE in this case. 1010 */ 1011 if (alignment < PAGE_SIZE) 1012 alignment = PAGE_SIZE; 1013 if (size < alignment) 1014 size = alignment; 1015 size = (size + PAGE_MASK) & ~(size_t)PAGE_MASK; 1016 if (alignment == PAGE_SIZE && size <= BIGCACHE_LIMIT) { 1017 big = bigcache_find_alloc(size); 1018 if (big && big->bytes < size) { 1019 _slabfree(big->base, FASTSLABREALLOC, &big); 1020 big = NULL; 1021 } 1022 if (big) { 1023 *memptr = big->base; 1024 big->active = size; 1025 if (big->active < big->bytes) { 1026 atomic_add_long(&excess_alloc, 1027 big->bytes - big->active); 1028 } 1029 bigp = bigalloc_lock(*memptr); 1030 big->next = *bigp; 1031 *bigp = big; 1032 bigalloc_unlock(*memptr); 1033 handle_excess_big(); 1034 return(0); 1035 } 1036 } 1037 *memptr = _vmem_alloc(size, alignment, 0); 1038 if (*memptr == NULL) 1039 return(ENOMEM); 1040 1041 big = _slaballoc(sizeof(struct bigalloc), 0); 1042 if (big == NULL) { 1043 _vmem_free(*memptr, size); 1044 *memptr = NULL; 1045 return(ENOMEM); 1046 } 1047 bigp = bigalloc_lock(*memptr); 1048 big->base = *memptr; 1049 big->active = size; 1050 big->bytes = size; /* no excess */ 1051 big->next = *bigp; 1052 *bigp = big; 1053 bigalloc_unlock(*memptr); 1054 1055 return(0); 1056 } 1057 1058 /* 1059 * free() (SLAB ALLOCATOR) - do the obvious 1060 */ 1061 void 1062 __free(void *ptr) 1063 { 1064 UTRACE(ptr, 0, 0); 1065 nmalloc_sigblockall(); 1066 _slabfree(ptr, 0, NULL); 1067 nmalloc_sigunblockall(); 1068 } 1069 1070 /* 1071 * _slaballoc() (SLAB ALLOCATOR) 1072 * 1073 * Allocate memory via the slab allocator. If the request is too large, 1074 * or if it page-aligned beyond a certain size, we fall back to the 1075 * KMEM subsystem 1076 */ 1077 static void * 1078 _slaballoc(size_t size, int flags) 1079 { 1080 slzone_t z; 1081 slchunk_t chunk; 1082 slglobaldata_t slgd; 1083 size_t chunking; 1084 thr_mags *tp; 1085 struct magazine *mp; 1086 int count; 1087 int zi; 1088 int off; 1089 void *obj; 1090 1091 /* 1092 * Handle the degenerate size == 0 case. Yes, this does happen. 1093 * Return a special pointer. This is to maintain compatibility with 1094 * the original malloc implementation. Certain devices, such as the 1095 * adaptec driver, not only allocate 0 bytes, they check for NULL and 1096 * also realloc() later on. Joy. 1097 */ 1098 if (size == 0) 1099 size = 1; 1100 1101 /* Capture global flags */ 1102 flags |= g_malloc_flags; 1103 1104 /* 1105 * Handle large allocations directly, with a separate bigmem cache. 1106 * 1107 * The backend allocator is pretty nasty on a SMP system. Use the 1108 * slab allocator for one and two page-sized chunks even though we 1109 * lose some efficiency. 1110 * 1111 * NOTE: Please see _slabmemalign(), which assumes that power-of-2 1112 * allocations up to an including MAX_SLAB_PAGEALIGN 1113 * can use _slaballoc() and be aligned to the same. The 1114 * zone cache can be used for this case, bigalloc does not 1115 * have to be used. 1116 */ 1117 if (size >= ZoneLimit || 1118 ((size & PAGE_MASK) == 0 && size > MAX_SLAB_PAGEALIGN)) { 1119 bigalloc_t big; 1120 bigalloc_t *bigp; 1121 1122 /* 1123 * Page-align and cache-color in case of virtually indexed 1124 * physically tagged L1 caches (aka SandyBridge). No sweat 1125 * otherwise, so just do it. 1126 * 1127 * (don't count as excess). 1128 */ 1129 size = (size + PAGE_MASK) & ~(size_t)PAGE_MASK; 1130 1131 /* 1132 * If we have overflowed above when rounding to the page 1133 * boundary, something has passed us (size_t)[-PAGE_MASK..-1] 1134 * so just return NULL, size at this point should be >= 0. 1135 */ 1136 if (size == 0) 1137 return (NULL); 1138 1139 /* 1140 * Force an additional page offset for 8KB-aligned requests 1141 * (i.e. 8KB, 16KB, etc) that helps spread data across the 1142 * CPU caches at the cost of some dead space in the memory 1143 * map. 1144 */ 1145 if ((size & (PAGE_SIZE * 2 - 1)) == 0) 1146 size += PAGE_SIZE; 1147 1148 /* 1149 * Try to reuse a cached big block to avoid mmap'ing. If it 1150 * turns out not to fit our requirements we throw it away 1151 * and allocate normally. 1152 */ 1153 big = NULL; 1154 if (size <= BIGCACHE_LIMIT) { 1155 big = bigcache_find_alloc(size); 1156 if (big && big->bytes < size) { 1157 _slabfree(big->base, FASTSLABREALLOC, &big); 1158 big = NULL; 1159 } 1160 } 1161 if (big) { 1162 chunk = big->base; 1163 if (flags & SAFLAG_ZERO) 1164 bzero(chunk, size); 1165 } else { 1166 chunk = _vmem_alloc(size, PAGE_SIZE, flags); 1167 if (chunk == NULL) 1168 return(NULL); 1169 1170 big = _slaballoc(sizeof(struct bigalloc), 0); 1171 if (big == NULL) { 1172 _vmem_free(chunk, size); 1173 return(NULL); 1174 } 1175 big->base = chunk; 1176 big->bytes = size; 1177 } 1178 big->active = size; 1179 1180 bigp = bigalloc_lock(chunk); 1181 if (big->active < big->bytes) { 1182 atomic_add_long(&excess_alloc, 1183 big->bytes - big->active); 1184 } 1185 big->next = *bigp; 1186 *bigp = big; 1187 bigalloc_unlock(chunk); 1188 handle_excess_big(); 1189 1190 return(chunk); 1191 } 1192 1193 /* Compute allocation zone; zoneindex will panic on excessive sizes */ 1194 zi = zoneindex(&size, &chunking); 1195 MASSERT(zi < NZONES); 1196 1197 obj = mtmagazine_alloc(zi, flags); 1198 if (obj != NULL) { 1199 if (flags & SAFLAG_ZERO) 1200 bzero(obj, size); 1201 return (obj); 1202 } 1203 1204 /* 1205 * Attempt to allocate out of an existing global zone. If all zones 1206 * are exhausted pull one off the free list or allocate a new one. 1207 */ 1208 slgd = &SLGlobalData; 1209 1210 again: 1211 if (slgd->ZoneAry[zi] == NULL) { 1212 z = zone_alloc(flags); 1213 if (z == NULL) 1214 goto fail; 1215 1216 /* 1217 * How big is the base structure? 1218 */ 1219 off = sizeof(struct slzone); 1220 1221 /* 1222 * Align the storage in the zone based on the chunking. 1223 * 1224 * Guarantee power-of-2 alignment for power-of-2-sized 1225 * chunks. Otherwise align based on the chunking size 1226 * (typically 8 or 16 bytes for small allocations). 1227 * 1228 * NOTE: Allocations >= ZoneLimit are governed by the 1229 * bigalloc code and typically only guarantee page-alignment. 1230 * 1231 * Set initial conditions for UIndex near the zone header 1232 * to reduce unecessary page faults, vs semi-randomization 1233 * to improve L1 cache saturation. 1234 * 1235 * NOTE: Please see _slabmemalign(), which assumes that 1236 * power-of-2 allocations up to an including 1237 * MAX_SLAB_PAGEALIGN can use _slaballoc() 1238 * and be aligned to the same. The zone cache can be 1239 * used for this case, bigalloc does not have to be 1240 * used. 1241 * 1242 * ALL power-of-2 requests that fall through to this 1243 * code use this rule (conditionals above limit this 1244 * to <= MAX_SLAB_PAGEALIGN). 1245 */ 1246 if ((size | (size - 1)) + 1 == (size << 1)) 1247 off = roundup2(off, size); 1248 else 1249 off = roundup2(off, chunking); 1250 z->z_Magic = ZALLOC_SLAB_MAGIC; 1251 z->z_ZoneIndex = zi; 1252 z->z_NMax = (ZoneSize - off) / size; 1253 z->z_NFree = z->z_NMax; 1254 z->z_BasePtr = (char *)z + off; 1255 z->z_UIndex = z->z_UEndIndex = 0; 1256 z->z_ChunkSize = size; 1257 z->z_FirstFreePg = ZonePageCount; 1258 if ((z->z_Flags & SLZF_UNOTZEROD) == 0) { 1259 flags &= ~SAFLAG_ZERO; /* already zero'd */ 1260 flags |= SAFLAG_PASSIVE; 1261 } 1262 1263 /* 1264 * Slide the base index for initial allocations out of the 1265 * next zone we create so we do not over-weight the lower 1266 * part of the cpu memory caches. 1267 */ 1268 slgd_lock(slgd); 1269 z->z_Next = slgd->ZoneAry[zi]; 1270 slgd->ZoneAry[zi] = z; 1271 } else { 1272 slgd_lock(slgd); 1273 z = slgd->ZoneAry[zi]; 1274 if (z == NULL) { 1275 slgd_unlock(slgd); 1276 goto again; 1277 } 1278 } 1279 1280 /* 1281 * Ok, we have a zone from which at least one chunk is available. 1282 */ 1283 MASSERT_WTHUNLK(z->z_NFree > 0, slgd_unlock(slgd)); 1284 1285 /* 1286 * Try to cache <count> chunks, up to CACHE_CHUNKS (32 typ) 1287 * to avoid unnecessary global lock contention. 1288 */ 1289 tp = &thread_mags; 1290 mp = tp->mags[zi].loaded; 1291 count = 0; 1292 if (mp && tp->init >= 0) { 1293 count = mp->capacity - mp->rounds; 1294 if (count >= z->z_NFree) 1295 count = z->z_NFree - 1; 1296 if (count > CACHE_CHUNKS) 1297 count = CACHE_CHUNKS; 1298 } 1299 1300 /* 1301 * Locate a chunk in a free page. This attempts to localize 1302 * reallocations into earlier pages without us having to sort 1303 * the chunk list. A chunk may still overlap a page boundary. 1304 */ 1305 while (z->z_FirstFreePg < ZonePageCount) { 1306 if ((chunk = z->z_PageAry[z->z_FirstFreePg]) != NULL) { 1307 if (((uintptr_t)chunk & ZoneMask) == 0) { 1308 slgd_unlock(slgd); 1309 _mpanic("assertion: corrupt malloc zone"); 1310 } 1311 z->z_PageAry[z->z_FirstFreePg] = chunk->c_Next; 1312 --z->z_NFree; 1313 1314 if (count == 0) 1315 goto done; 1316 mp->objects[mp->rounds++] = chunk; 1317 --count; 1318 continue; 1319 } 1320 ++z->z_FirstFreePg; 1321 } 1322 1323 /* 1324 * No chunks are available but NFree said we had some memory, 1325 * so it must be available in the never-before-used-memory 1326 * area governed by UIndex. The consequences are very 1327 * serious if our zone got corrupted so we use an explicit 1328 * panic rather then a KASSERT. 1329 */ 1330 for (;;) { 1331 chunk = (slchunk_t)(z->z_BasePtr + z->z_UIndex * size); 1332 --z->z_NFree; 1333 if (++z->z_UIndex == z->z_NMax) 1334 z->z_UIndex = 0; 1335 if (z->z_UIndex == z->z_UEndIndex) { 1336 if (z->z_NFree != 0) { 1337 slgd_unlock(slgd); 1338 _mpanic("slaballoc: corrupted zone"); 1339 } 1340 } 1341 if (count == 0) 1342 break; 1343 mp->objects[mp->rounds++] = chunk; 1344 --count; 1345 } 1346 1347 if ((z->z_Flags & SLZF_UNOTZEROD) == 0) { 1348 flags &= ~SAFLAG_ZERO; 1349 flags |= SAFLAG_PASSIVE; 1350 } 1351 1352 done: 1353 /* 1354 * Remove us from the ZoneAry[] when we become empty 1355 */ 1356 if (z->z_NFree == 0) { 1357 slgd->ZoneAry[zi] = z->z_Next; 1358 z->z_Next = NULL; 1359 } 1360 slgd_unlock(slgd); 1361 if (flags & SAFLAG_ZERO) 1362 bzero(chunk, size); 1363 1364 return(chunk); 1365 fail: 1366 return(NULL); 1367 } 1368 1369 /* 1370 * Reallocate memory within the chunk 1371 */ 1372 static void * 1373 _slabrealloc(void *ptr, size_t size) 1374 { 1375 bigalloc_t *bigp; 1376 void *nptr; 1377 slzone_t z; 1378 size_t chunking; 1379 1380 if (ptr == NULL) { 1381 return(_slaballoc(size, 0)); 1382 } 1383 1384 if (size == 0) 1385 size = 1; 1386 1387 /* 1388 * Handle oversized allocations. 1389 */ 1390 if ((bigp = bigalloc_check_and_lock(ptr)) != NULL) { 1391 bigalloc_t big; 1392 size_t bigbytes; 1393 1394 while ((big = *bigp) != NULL) { 1395 if (big->base == ptr) { 1396 size = (size + PAGE_MASK) & ~(size_t)PAGE_MASK; 1397 bigbytes = big->bytes; 1398 1399 /* 1400 * If it already fits determine if it makes 1401 * sense to shrink/reallocate. Try to optimize 1402 * programs which stupidly make incremental 1403 * reallocations larger or smaller by scaling 1404 * the allocation. Also deal with potential 1405 * coloring. 1406 */ 1407 if (size >= (bigbytes >> 1) && 1408 size <= bigbytes) { 1409 if (big->active != size) { 1410 atomic_add_long(&excess_alloc, 1411 big->active - 1412 size); 1413 } 1414 big->active = size; 1415 bigalloc_unlock(ptr); 1416 return(ptr); 1417 } 1418 1419 /* 1420 * For large reallocations, allocate more space 1421 * than we need to try to avoid excessive 1422 * reallocations later on. 1423 */ 1424 chunking = size + (size >> 3); 1425 chunking = (chunking + PAGE_MASK) & 1426 ~(size_t)PAGE_MASK; 1427 1428 /* 1429 * Try to allocate adjacently in case the 1430 * program is idiotically realloc()ing a 1431 * huge memory block just slightly bigger. 1432 * (llvm's llc tends to do this a lot). 1433 * 1434 * (MAP_TRYFIXED forces mmap to fail if there 1435 * is already something at the address). 1436 */ 1437 if (chunking > bigbytes) { 1438 char *addr; 1439 int errno_save = errno; 1440 1441 addr = mmap((char *)ptr + bigbytes, 1442 chunking - bigbytes, 1443 PROT_READ|PROT_WRITE, 1444 MAP_PRIVATE|MAP_ANON| 1445 MAP_TRYFIXED, 1446 -1, 0); 1447 errno = errno_save; 1448 if (addr == (char *)ptr + bigbytes) { 1449 atomic_add_long(&excess_alloc, 1450 big->active - 1451 big->bytes + 1452 chunking - 1453 size); 1454 big->bytes = chunking; 1455 big->active = size; 1456 bigalloc_unlock(ptr); 1457 1458 return(ptr); 1459 } 1460 MASSERT_WTHUNLK( 1461 (void *)addr == MAP_FAILED, 1462 bigalloc_unlock(ptr)); 1463 } 1464 1465 /* 1466 * Failed, unlink big and allocate fresh. 1467 * (note that we have to leave (big) intact 1468 * in case the slaballoc fails). 1469 */ 1470 *bigp = big->next; 1471 bigalloc_unlock(ptr); 1472 if ((nptr = _slaballoc(size, 0)) == NULL) { 1473 /* Relink block */ 1474 bigp = bigalloc_lock(ptr); 1475 big->next = *bigp; 1476 *bigp = big; 1477 bigalloc_unlock(ptr); 1478 return(NULL); 1479 } 1480 if (size > bigbytes) 1481 size = bigbytes; 1482 bcopy(ptr, nptr, size); 1483 atomic_add_long(&excess_alloc, big->active - 1484 big->bytes); 1485 _slabfree(ptr, FASTSLABREALLOC, &big); 1486 1487 return(nptr); 1488 } 1489 bigp = &big->next; 1490 } 1491 bigalloc_unlock(ptr); 1492 handle_excess_big(); 1493 } 1494 1495 /* 1496 * Get the original allocation's zone. If the new request winds 1497 * up using the same chunk size we do not have to do anything. 1498 * 1499 * NOTE: We don't have to lock the globaldata here, the fields we 1500 * access here will not change at least as long as we have control 1501 * over the allocation. 1502 */ 1503 z = (slzone_t)((uintptr_t)ptr & ~(uintptr_t)ZoneMask); 1504 MASSERT(z->z_Magic == ZALLOC_SLAB_MAGIC); 1505 1506 /* 1507 * Use zoneindex() to chunk-align the new size, as long as the 1508 * new size is not too large. 1509 */ 1510 if (size < ZoneLimit) { 1511 zoneindex(&size, &chunking); 1512 if (z->z_ChunkSize == size) { 1513 return(ptr); 1514 } 1515 } 1516 1517 /* 1518 * Allocate memory for the new request size and copy as appropriate. 1519 */ 1520 if ((nptr = _slaballoc(size, 0)) != NULL) { 1521 if (size > z->z_ChunkSize) 1522 size = z->z_ChunkSize; 1523 bcopy(ptr, nptr, size); 1524 _slabfree(ptr, 0, NULL); 1525 } 1526 1527 return(nptr); 1528 } 1529 1530 /* 1531 * free (SLAB ALLOCATOR) 1532 * 1533 * Free a memory block previously allocated by malloc. Note that we do not 1534 * attempt to uplodate ks_loosememuse as MP races could prevent us from 1535 * checking memory limits in malloc. 1536 * 1537 * flags: 1538 * FASTSLABREALLOC Fast call from realloc, *rbigp already 1539 * unlinked. 1540 * 1541 * MPSAFE 1542 */ 1543 static void 1544 _slabfree(void *ptr, int flags, bigalloc_t *rbigp) 1545 { 1546 slzone_t z; 1547 slchunk_t chunk; 1548 bigalloc_t big; 1549 bigalloc_t *bigp; 1550 slglobaldata_t slgd; 1551 size_t size; 1552 int zi; 1553 int pgno; 1554 1555 /* Fast realloc path for big allocations */ 1556 if (flags & FASTSLABREALLOC) { 1557 big = *rbigp; 1558 goto fastslabrealloc; 1559 } 1560 1561 /* 1562 * Handle NULL frees and special 0-byte allocations 1563 */ 1564 if (ptr == NULL) 1565 return; 1566 1567 /* 1568 * Handle oversized allocations. 1569 */ 1570 if ((bigp = bigalloc_check_and_lock(ptr)) != NULL) { 1571 while ((big = *bigp) != NULL) { 1572 if (big->base == ptr) { 1573 *bigp = big->next; 1574 atomic_add_long(&excess_alloc, big->active - 1575 big->bytes); 1576 bigalloc_unlock(ptr); 1577 1578 /* 1579 * Try to stash the block we are freeing, 1580 * potentially receiving another block in 1581 * return which must be freed. 1582 */ 1583 fastslabrealloc: 1584 if (big->bytes <= BIGCACHE_LIMIT) { 1585 big = bigcache_find_free(big); 1586 if (big == NULL) 1587 return; 1588 } 1589 ptr = big->base; /* reload */ 1590 size = big->bytes; 1591 _slabfree(big, 0, NULL); 1592 _vmem_free(ptr, size); 1593 return; 1594 } 1595 bigp = &big->next; 1596 } 1597 bigalloc_unlock(ptr); 1598 handle_excess_big(); 1599 } 1600 1601 /* 1602 * Zone case. Figure out the zone based on the fact that it is 1603 * ZoneSize aligned. 1604 */ 1605 z = (slzone_t)((uintptr_t)ptr & ~(uintptr_t)ZoneMask); 1606 MASSERT(z->z_Magic == ZALLOC_SLAB_MAGIC); 1607 1608 size = z->z_ChunkSize; 1609 zi = z->z_ZoneIndex; 1610 1611 if (g_malloc_flags & SAFLAG_ZERO) 1612 bzero(ptr, size); 1613 1614 if (mtmagazine_free(zi, ptr) == 0) 1615 return; 1616 1617 pgno = ((char *)ptr - (char *)z) >> PAGE_SHIFT; 1618 chunk = ptr; 1619 1620 /* 1621 * Add this free non-zero'd chunk to a linked list for reuse, adjust 1622 * z_FirstFreePg. 1623 */ 1624 slgd = &SLGlobalData; 1625 slgd_lock(slgd); 1626 1627 chunk->c_Next = z->z_PageAry[pgno]; 1628 z->z_PageAry[pgno] = chunk; 1629 if (z->z_FirstFreePg > pgno) 1630 z->z_FirstFreePg = pgno; 1631 1632 /* 1633 * Bump the number of free chunks. If it becomes non-zero the zone 1634 * must be added back onto the appropriate list. 1635 */ 1636 if (z->z_NFree++ == 0) { 1637 z->z_Next = slgd->ZoneAry[z->z_ZoneIndex]; 1638 slgd->ZoneAry[z->z_ZoneIndex] = z; 1639 } 1640 1641 /* 1642 * If the zone becomes totally free we get rid of it. 1643 */ 1644 if (z->z_NFree == z->z_NMax) { 1645 slzone_t *pz; 1646 1647 pz = &slgd->ZoneAry[z->z_ZoneIndex]; 1648 while (z != *pz) 1649 pz = &(*pz)->z_Next; 1650 *pz = z->z_Next; 1651 z->z_Magic = -1; 1652 z->z_Next = NULL; 1653 slgd_unlock(slgd); 1654 zone_free(z); 1655 } else { 1656 slgd_unlock(slgd); 1657 } 1658 } 1659 1660 /* 1661 * Allocate and return a magazine. Return NULL if no magazines are 1662 * available. 1663 */ 1664 static __inline void * 1665 magazine_alloc(struct magazine *mp) 1666 { 1667 void *obj; 1668 1669 if (mp && MAGAZINE_NOTEMPTY(mp)) { 1670 obj = mp->objects[--mp->rounds]; 1671 } else { 1672 obj = NULL; 1673 } 1674 return (obj); 1675 } 1676 1677 static __inline int 1678 magazine_free(struct magazine *mp, void *p) 1679 { 1680 if (mp != NULL && MAGAZINE_NOTFULL(mp)) { 1681 mp->objects[mp->rounds++] = p; 1682 return 0; 1683 } 1684 1685 return -1; 1686 } 1687 1688 static void * 1689 mtmagazine_alloc(int zi, int flags) 1690 { 1691 thr_mags *tp; 1692 struct magazine *mp, *emptymag; 1693 magazine_depot *d; 1694 void *obj; 1695 1696 /* 1697 * Do not try to access per-thread magazines while the mtmagazine 1698 * is being initialized or destroyed. 1699 */ 1700 tp = &thread_mags; 1701 if (tp->init < 0) 1702 return(NULL); 1703 1704 /* 1705 * Primary per-thread allocation loop 1706 */ 1707 nmalloc_sigblockall(); 1708 for (;;) { 1709 /* 1710 * Make sure we have a magazine available for use. 1711 */ 1712 if (tp->newmag == NULL && (flags & SAFLAG_MAGS) == 0) { 1713 mp = _slaballoc(sizeof(struct magazine), 1714 SAFLAG_ZERO | SAFLAG_MAGS); 1715 if (mp == NULL) { 1716 obj = NULL; 1717 break; 1718 } 1719 if (tp->newmag) { 1720 _slabfree(mp, 0, NULL); 1721 } else { 1722 tp->newmag = mp; 1723 } 1724 } 1725 1726 /* 1727 * If the loaded magazine has rounds, allocate and return 1728 */ 1729 mp = tp->mags[zi].loaded; 1730 obj = magazine_alloc(mp); 1731 if (obj) 1732 break; 1733 1734 /* 1735 * The prev magazine can only be completely empty or completely 1736 * full. If it is full, swap it with the loaded magazine 1737 * and retry. 1738 */ 1739 mp = tp->mags[zi].prev; 1740 if (mp && MAGAZINE_FULL(mp)) { 1741 MASSERT(mp->rounds != 0); 1742 swap_mags(&tp->mags[zi]); /* prev now empty */ 1743 continue; 1744 } 1745 1746 /* 1747 * If the depot has no loaded magazines ensure that tp->loaded 1748 * is not NULL and return NULL. This will allow _slaballoc() 1749 * to cache referals to SLGlobalData in a magazine. 1750 */ 1751 d = &depots[zi]; 1752 if (SLIST_EMPTY(&d->full)) { /* UNLOCKED TEST IS SAFE */ 1753 mp = tp->mags[zi].loaded; 1754 if (mp == NULL && tp->newmag) { 1755 mp = tp->newmag; 1756 tp->newmag = NULL; 1757 mp->capacity = zonecapacity(zi); 1758 mp->rounds = 0; 1759 mp->flags = 0; 1760 tp->mags[zi].loaded = mp; 1761 } 1762 break; 1763 } 1764 1765 /* 1766 * Cycle: depot(loaded) -> loaded -> prev -> depot(empty) 1767 * 1768 * If we race and the depot has no full magazines, retry. 1769 */ 1770 depot_lock(d); 1771 mp = SLIST_FIRST(&d->full); 1772 if (mp) { 1773 SLIST_REMOVE_HEAD(&d->full, nextmagazine); 1774 emptymag = tp->mags[zi].prev; 1775 if (emptymag) { 1776 SLIST_INSERT_HEAD(&d->empty, emptymag, 1777 nextmagazine); 1778 } 1779 tp->mags[zi].prev = tp->mags[zi].loaded; 1780 tp->mags[zi].loaded = mp; 1781 MASSERT(MAGAZINE_NOTEMPTY(mp)); 1782 } 1783 depot_unlock(d); 1784 continue; 1785 } 1786 nmalloc_sigunblockall(); 1787 1788 return (obj); 1789 } 1790 1791 static int 1792 mtmagazine_free(int zi, void *ptr) 1793 { 1794 thr_mags *tp; 1795 struct magazine *mp, *loadedmag; 1796 magazine_depot *d; 1797 int rc = -1; 1798 1799 /* 1800 * Do not try to access per-thread magazines while the mtmagazine 1801 * is being initialized or destroyed. 1802 */ 1803 tp = &thread_mags; 1804 if (tp->init < 0) 1805 return(-1); 1806 1807 /* 1808 * Primary per-thread freeing loop 1809 */ 1810 nmalloc_sigblockall(); 1811 for (;;) { 1812 /* 1813 * Make sure a new magazine is available in case we have 1814 * to use it. Staging the newmag allows us to avoid 1815 * some locking/reentrancy complexity. 1816 * 1817 * Temporarily disable the per-thread caches for this 1818 * allocation to avoid reentrancy and/or to avoid a 1819 * stack overflow if the [zi] happens to be the same that 1820 * would be used to allocate the new magazine. 1821 */ 1822 if (tp->newmag == NULL) { 1823 tp->newmag = _slaballoc(sizeof(struct magazine), 1824 SAFLAG_ZERO); 1825 if (tp->newmag == NULL) { 1826 rc = -1; 1827 break; 1828 } 1829 } 1830 1831 /* 1832 * If the loaded magazine has space, free directly to it 1833 */ 1834 rc = magazine_free(tp->mags[zi].loaded, ptr); 1835 if (rc == 0) 1836 break; 1837 1838 /* 1839 * The prev magazine can only be completely empty or completely 1840 * full. If it is empty, swap it with the loaded magazine 1841 * and retry. 1842 */ 1843 mp = tp->mags[zi].prev; 1844 if (mp && MAGAZINE_EMPTY(mp)) { 1845 MASSERT(mp->rounds == 0); 1846 swap_mags(&tp->mags[zi]); /* prev now full */ 1847 continue; 1848 } 1849 1850 /* 1851 * Try to get an empty magazine from the depot. Cycle 1852 * through depot(empty)->loaded->prev->depot(full). 1853 * Retry if an empty magazine was available from the depot. 1854 */ 1855 d = &depots[zi]; 1856 depot_lock(d); 1857 1858 if ((loadedmag = tp->mags[zi].prev) != NULL) 1859 SLIST_INSERT_HEAD(&d->full, loadedmag, nextmagazine); 1860 tp->mags[zi].prev = tp->mags[zi].loaded; 1861 mp = SLIST_FIRST(&d->empty); 1862 if (mp) { 1863 tp->mags[zi].loaded = mp; 1864 SLIST_REMOVE_HEAD(&d->empty, nextmagazine); 1865 depot_unlock(d); 1866 MASSERT(MAGAZINE_NOTFULL(mp)); 1867 } else { 1868 mp = tp->newmag; 1869 tp->newmag = NULL; 1870 mp->capacity = zonecapacity(zi); 1871 mp->rounds = 0; 1872 mp->flags = 0; 1873 tp->mags[zi].loaded = mp; 1874 depot_unlock(d); 1875 } 1876 } 1877 nmalloc_sigunblockall(); 1878 1879 return rc; 1880 } 1881 1882 static void 1883 mtmagazine_init(void) 1884 { 1885 int error; 1886 1887 error = _pthread_key_create(&thread_mags_key, mtmagazine_destructor); 1888 if (error) 1889 abort(); 1890 } 1891 1892 /* 1893 * This function is only used by the thread exit destructor 1894 */ 1895 static void 1896 mtmagazine_drain(struct magazine *mp) 1897 { 1898 void *obj; 1899 1900 while (MAGAZINE_NOTEMPTY(mp)) { 1901 obj = magazine_alloc(mp); 1902 _slabfree(obj, 0, NULL); 1903 } 1904 } 1905 1906 /* 1907 * mtmagazine_destructor() 1908 * 1909 * When a thread exits, we reclaim all its resources; all its magazines are 1910 * drained and the structures are freed. 1911 * 1912 * WARNING! The destructor can be called multiple times if the larger user 1913 * program has its own destructors which run after ours which 1914 * allocate or free memory. 1915 */ 1916 static void 1917 mtmagazine_destructor(void *thrp) 1918 { 1919 thr_mags *tp = thrp; 1920 struct magazine *mp; 1921 int i; 1922 1923 if (__isexiting) 1924 return; 1925 1926 /* 1927 * Prevent further use of mtmagazines while we are destructing 1928 * them, as well as for any destructors which are run after us 1929 * prior to the thread actually being destroyed. 1930 */ 1931 tp->init = -1; 1932 1933 for (i = 0; i < NZONES; i++) { 1934 mp = tp->mags[i].loaded; 1935 tp->mags[i].loaded = NULL; 1936 if (mp) { 1937 if (MAGAZINE_NOTEMPTY(mp)) 1938 mtmagazine_drain(mp); 1939 _slabfree(mp, 0, NULL); 1940 } 1941 1942 mp = tp->mags[i].prev; 1943 tp->mags[i].prev = NULL; 1944 if (mp) { 1945 if (MAGAZINE_NOTEMPTY(mp)) 1946 mtmagazine_drain(mp); 1947 _slabfree(mp, 0, NULL); 1948 } 1949 } 1950 1951 if (tp->newmag) { 1952 mp = tp->newmag; 1953 tp->newmag = NULL; 1954 _slabfree(mp, 0, NULL); 1955 } 1956 } 1957 1958 /* 1959 * zone_alloc() 1960 * 1961 * Attempt to allocate a zone from the zone magazine. 1962 */ 1963 static slzone_t 1964 zone_alloc(int flags) 1965 { 1966 slzone_t z; 1967 1968 zone_magazine_lock(); 1969 1970 z = magazine_alloc(&zone_magazine); 1971 if (z == NULL) { 1972 zone_magazine_unlock(); 1973 z = _vmem_alloc(ZoneSize, ZoneSize, flags); 1974 } else { 1975 z->z_Flags |= SLZF_UNOTZEROD; 1976 zone_magazine_unlock(); 1977 } 1978 return z; 1979 } 1980 1981 /* 1982 * Free a zone. 1983 */ 1984 static void 1985 zone_free(void *z) 1986 { 1987 void *excess[M_ZONE_HYSTERESIS]; 1988 int i; 1989 1990 zone_magazine_lock(); 1991 1992 bzero(z, sizeof(struct slzone)); 1993 1994 if (opt_madvise) 1995 madvise(z, ZoneSize, MADV_FREE); 1996 1997 i = magazine_free(&zone_magazine, z); 1998 1999 /* 2000 * If we failed to free, collect excess magazines; release the zone 2001 * magazine lock, and then free to the system via _vmem_free. Re-enable 2002 * BURST mode for the magazine. 2003 */ 2004 if (i == -1) { 2005 for (i = 0; i < M_ZONE_HYSTERESIS; ++i) { 2006 excess[i] = magazine_alloc(&zone_magazine); 2007 MASSERT_WTHUNLK(excess[i] != NULL, 2008 zone_magazine_unlock()); 2009 } 2010 zone_magazine_unlock(); 2011 2012 for (i = 0; i < M_ZONE_HYSTERESIS; ++i) 2013 _vmem_free(excess[i], ZoneSize); 2014 _vmem_free(z, ZoneSize); 2015 } else { 2016 zone_magazine_unlock(); 2017 } 2018 } 2019 2020 /* 2021 * _vmem_alloc() 2022 * 2023 * Directly map memory in PAGE_SIZE'd chunks with the specified 2024 * alignment. 2025 * 2026 * Alignment must be a multiple of PAGE_SIZE. 2027 * 2028 * Size must be >= alignment. 2029 */ 2030 static void * 2031 _vmem_alloc(size_t size, size_t align, int flags) 2032 { 2033 static char *addr_hint; 2034 static int reset_hint = 16; 2035 char *addr; 2036 char *save; 2037 2038 if (--reset_hint <= 0) { 2039 addr_hint = NULL; 2040 reset_hint = 16; 2041 } 2042 2043 /* 2044 * Map anonymous private memory. 2045 */ 2046 save = mmap(addr_hint, size, PROT_READ|PROT_WRITE, 2047 MAP_PRIVATE|MAP_ANON, -1, 0); 2048 if (save == MAP_FAILED) 2049 goto worst_case; 2050 if (((uintptr_t)save & (align - 1)) == 0) 2051 return((void *)save); 2052 2053 addr_hint = (char *)(((size_t)save + (align - 1)) & ~(align - 1)); 2054 munmap(save, size); 2055 2056 save = mmap(addr_hint, size, PROT_READ|PROT_WRITE, 2057 MAP_PRIVATE|MAP_ANON, -1, 0); 2058 if (save == MAP_FAILED) 2059 goto worst_case; 2060 if (((size_t)save & (align - 1)) == 0) 2061 return((void *)save); 2062 munmap(save, size); 2063 2064 worst_case: 2065 save = mmap(NULL, size + align, PROT_READ|PROT_WRITE, 2066 MAP_PRIVATE|MAP_ANON, -1, 0); 2067 if (save == MAP_FAILED) 2068 return NULL; 2069 2070 addr = (char *)(((size_t)save + (align - 1)) & ~(align - 1)); 2071 if (save != addr) 2072 munmap(save, addr - save); 2073 if (addr + size != save + size + align) 2074 munmap(addr + size, save + align - addr); 2075 2076 addr_hint = addr + size; 2077 2078 return ((void *)addr); 2079 } 2080 2081 /* 2082 * _vmem_free() 2083 * 2084 * Free a chunk of memory allocated with _vmem_alloc() 2085 */ 2086 static void 2087 _vmem_free(void *ptr, size_t size) 2088 { 2089 munmap(ptr, size); 2090 } 2091 2092 /* 2093 * Panic on fatal conditions 2094 */ 2095 static void 2096 _mpanic(const char *ctl, ...) 2097 { 2098 va_list va; 2099 2100 if (malloc_panic == 0) { 2101 malloc_panic = 1; 2102 va_start(va, ctl); 2103 vfprintf(stderr, ctl, va); 2104 fprintf(stderr, "\n"); 2105 fflush(stderr); 2106 va_end(va); 2107 } 2108 abort(); 2109 } 2110 2111 __weak_reference(__aligned_alloc, aligned_alloc); 2112 __weak_reference(__malloc, malloc); 2113 __weak_reference(__calloc, calloc); 2114 __weak_reference(__posix_memalign, posix_memalign); 2115 __weak_reference(__realloc, realloc); 2116 __weak_reference(__free, free); 2117