1 /* 2 * NMALLOC.C - New Malloc (ported from kernel slab allocator) 3 * 4 * Copyright (c) 2003,2004,2009,2010 The DragonFly Project. All rights reserved. 5 * 6 * This code is derived from software contributed to The DragonFly Project 7 * by Matthew Dillon <dillon@backplane.com> and by 8 * Venkatesh Srinivas <me@endeavour.zapto.org>. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in 18 * the documentation and/or other materials provided with the 19 * distribution. 20 * 3. Neither the name of The DragonFly Project nor the names of its 21 * contributors may be used to endorse or promote products derived 22 * from this software without specific, prior written permission. 23 * 24 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 25 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 26 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 27 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 28 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 29 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, 30 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 31 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 32 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 33 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 34 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 35 * SUCH DAMAGE. 36 * 37 * $Id: nmalloc.c,v 1.37 2010/07/23 08:20:35 vsrinivas Exp $ 38 */ 39 /* 40 * This module implements a slab allocator drop-in replacement for the 41 * libc malloc(). 42 * 43 * A slab allocator reserves a ZONE for each chunk size, then lays the 44 * chunks out in an array within the zone. Allocation and deallocation 45 * is nearly instantaneous, and overhead losses are limited to a fixed 46 * worst-case amount. 47 * 48 * The slab allocator does not have to pre-initialize the list of 49 * free chunks for each zone, and the underlying VM will not be 50 * touched at all beyond the zone header until an actual allocation 51 * needs it. 52 * 53 * Slab management and locking is done on a per-zone basis. 54 * 55 * Alloc Size Chunking Number of zones 56 * 0-127 8 16 57 * 128-255 16 8 58 * 256-511 32 8 59 * 512-1023 64 8 60 * 1024-2047 128 8 61 * 2048-4095 256 8 62 * 4096-8191 512 8 63 * 8192-16383 1024 8 64 * 16384-32767 2048 8 65 * 66 * Allocations >= ZoneLimit (16K) go directly to mmap and a hash table 67 * is used to locate for free. One and Two-page allocations use the 68 * zone mechanic to avoid excessive mmap()/munmap() calls. 69 * 70 * API FEATURES AND SIDE EFFECTS 71 * 72 * + power-of-2 sized allocations up to a page will be power-of-2 aligned. 73 * Above that power-of-2 sized allocations are page-aligned. Non 74 * power-of-2 sized allocations are aligned the same as the chunk 75 * size for their zone. 76 * + malloc(0) returns a special non-NULL value 77 * + ability to allocate arbitrarily large chunks of memory 78 * + realloc will reuse the passed pointer if possible, within the 79 * limitations of the zone chunking. 80 * 81 * Multithreaded enhancements for small allocations introduced August 2010. 82 * These are in the spirit of 'libumem'. See: 83 * Bonwick, J.; Adams, J. (2001). "Magazines and Vmem: Extending the 84 * slab allocator to many CPUs and arbitrary resources". In Proc. 2001 85 * USENIX Technical Conference. USENIX Association. 86 * 87 * Oversized allocations employ the BIGCACHE mechanic whereby large 88 * allocations may be handed significantly larger buffers, allowing them 89 * to avoid mmap/munmap operations even through significant realloc()s. 90 * The excess space is only trimmed if too many large allocations have been 91 * given this treatment. 92 * 93 * TUNING 94 * 95 * The value of the environment variable MALLOC_OPTIONS is a character string 96 * containing various flags to tune nmalloc. 97 * 98 * 'U' / ['u'] Generate / do not generate utrace entries for ktrace(1) 99 * This will generate utrace events for all malloc, 100 * realloc, and free calls. There are tools (mtrplay) to 101 * replay and allocation pattern or to graph heap structure 102 * (mtrgraph) which can interpret these logs. 103 * 'Z' / ['z'] Zero out / do not zero all allocations. 104 * Each new byte of memory allocated by malloc, realloc, or 105 * reallocf will be initialized to 0. This is intended for 106 * debugging and will affect performance negatively. 107 * 'H' / ['h'] Pass a hint to the kernel about pages unused by the 108 * allocation functions. 109 */ 110 111 /* cc -shared -fPIC -g -O -I/usr/src/lib/libc/include -o nmalloc.so nmalloc.c */ 112 113 #include "namespace.h" 114 #include <sys/param.h> 115 #include <sys/types.h> 116 #include <sys/mman.h> 117 #include <sys/queue.h> 118 #include <sys/ktrace.h> 119 #include <stdio.h> 120 #include <stdint.h> 121 #include <stdlib.h> 122 #include <stdarg.h> 123 #include <stddef.h> 124 #include <unistd.h> 125 #include <string.h> 126 #include <fcntl.h> 127 #include <errno.h> 128 #include <pthread.h> 129 #include <machine/atomic.h> 130 #include "un-namespace.h" 131 132 #include "libc_private.h" 133 #include "spinlock.h" 134 135 void __free(void *); 136 void *__malloc(size_t); 137 void *__calloc(size_t, size_t); 138 void *__realloc(void *, size_t); 139 void *__aligned_alloc(size_t, size_t); 140 int __posix_memalign(void **, size_t, size_t); 141 142 /* 143 * Linked list of large allocations 144 */ 145 typedef struct bigalloc { 146 struct bigalloc *next; /* hash link */ 147 void *base; /* base pointer */ 148 u_long active; /* bytes active */ 149 u_long bytes; /* bytes allocated */ 150 } *bigalloc_t; 151 152 /* 153 * Note that any allocations which are exact multiples of PAGE_SIZE, or 154 * which are >= ZALLOC_ZONE_LIMIT, will fall through to the kmem subsystem. 155 */ 156 #define ZALLOC_ZONE_LIMIT (16 * 1024) /* max slab-managed alloc */ 157 #define ZALLOC_MIN_ZONE_SIZE (32 * 1024) /* minimum zone size */ 158 #define ZALLOC_MAX_ZONE_SIZE (128 * 1024) /* maximum zone size */ 159 #define ZALLOC_ZONE_SIZE (64 * 1024) 160 #define ZALLOC_SLAB_MAGIC 0x736c6162 /* magic sanity */ 161 #define ZALLOC_SLAB_SLIDE 20 /* L1-cache skip */ 162 163 #if ZALLOC_ZONE_LIMIT == 16384 164 #define NZONES 72 165 #elif ZALLOC_ZONE_LIMIT == 32768 166 #define NZONES 80 167 #else 168 #error "I couldn't figure out NZONES" 169 #endif 170 171 /* 172 * Chunk structure for free elements 173 */ 174 typedef struct slchunk { 175 struct slchunk *c_Next; 176 } *slchunk_t; 177 178 /* 179 * The IN-BAND zone header is placed at the beginning of each zone. 180 */ 181 struct slglobaldata; 182 183 typedef struct slzone { 184 int32_t z_Magic; /* magic number for sanity check */ 185 int z_NFree; /* total free chunks / ualloc space */ 186 struct slzone *z_Next; /* ZoneAry[] link if z_NFree non-zero */ 187 int z_NMax; /* maximum free chunks */ 188 char *z_BasePtr; /* pointer to start of chunk array */ 189 int z_UIndex; /* current initial allocation index */ 190 int z_UEndIndex; /* last (first) allocation index */ 191 int z_ChunkSize; /* chunk size for validation */ 192 int z_FirstFreePg; /* chunk list on a page-by-page basis */ 193 int z_ZoneIndex; 194 int z_Flags; 195 struct slchunk *z_PageAry[ZALLOC_ZONE_SIZE / PAGE_SIZE]; 196 } *slzone_t; 197 198 typedef struct slglobaldata { 199 spinlock_t Spinlock; 200 slzone_t ZoneAry[NZONES];/* linked list of zones NFree > 0 */ 201 int JunkIndex; 202 } *slglobaldata_t; 203 204 #define SLZF_UNOTZEROD 0x0001 205 206 #define FASTSLABREALLOC 0x02 207 208 /* 209 * Misc constants. Note that allocations that are exact multiples of 210 * PAGE_SIZE, or exceed the zone limit, fall through to the kmem module. 211 * IN_SAME_PAGE_MASK is used to sanity-check the per-page free lists. 212 */ 213 #define MIN_CHUNK_SIZE 8 /* in bytes */ 214 #define MIN_CHUNK_MASK (MIN_CHUNK_SIZE - 1) 215 #define IN_SAME_PAGE_MASK (~(intptr_t)PAGE_MASK | MIN_CHUNK_MASK) 216 217 /* 218 * WARNING: A limited number of spinlocks are available, BIGXSIZE should 219 * not be larger then 64. 220 */ 221 #define BIGHSHIFT 10 /* bigalloc hash table */ 222 #define BIGHSIZE (1 << BIGHSHIFT) 223 #define BIGHMASK (BIGHSIZE - 1) 224 #define BIGXSIZE (BIGHSIZE / 16) /* bigalloc lock table */ 225 #define BIGXMASK (BIGXSIZE - 1) 226 227 /* 228 * BIGCACHE caches oversized allocations. Note that a linear search is 229 * performed, so do not make the cache too large. 230 * 231 * BIGCACHE will garbage-collect excess space when the excess exceeds the 232 * specified value. A relatively large number should be used here because 233 * garbage collection is expensive. 234 */ 235 #define BIGCACHE 16 236 #define BIGCACHE_MASK (BIGCACHE - 1) 237 #define BIGCACHE_LIMIT (1024 * 1024) /* size limit */ 238 #define BIGCACHE_EXCESS (16 * 1024 * 1024) /* garbage collect */ 239 240 #define SAFLAG_ZERO 0x0001 241 #define SAFLAG_PASSIVE 0x0002 242 243 /* 244 * Thread control 245 */ 246 247 #define arysize(ary) (sizeof(ary)/sizeof((ary)[0])) 248 249 /* 250 * The assertion macros try to pretty-print assertion failures 251 * which can be caused by corruption. If a lock is held, we 252 * provide a macro that attempts to release it before asserting 253 * in order to prevent (e.g.) a reentrant SIGABRT calling malloc 254 * and deadlocking, resulting in the program freezing up. 255 */ 256 #define MASSERT(exp) \ 257 do { if (__predict_false(!(exp))) \ 258 _mpanic("assertion: %s in %s", \ 259 #exp, __func__); \ 260 } while (0) 261 262 #define MASSERT_WTHUNLK(exp, unlk) \ 263 do { if (__predict_false(!(exp))) { \ 264 unlk; \ 265 _mpanic("assertion: %s in %s", \ 266 #exp, __func__); \ 267 } \ 268 } while (0) 269 270 /* 271 * Magazines 272 */ 273 274 #define M_MAX_ROUNDS 64 275 #define M_ZONE_ROUNDS 64 276 #define M_LOW_ROUNDS 32 277 #define M_INIT_ROUNDS 8 278 #define M_BURST_FACTOR 8 279 #define M_BURST_NSCALE 2 280 281 #define M_BURST 0x0001 282 #define M_BURST_EARLY 0x0002 283 284 struct magazine { 285 SLIST_ENTRY(magazine) nextmagazine; 286 287 int flags; 288 int capacity; /* Max rounds in this magazine */ 289 int rounds; /* Current number of free rounds */ 290 int burst_factor; /* Number of blocks to prefill with */ 291 int low_factor; /* Free till low_factor from full mag */ 292 void *objects[M_MAX_ROUNDS]; 293 }; 294 295 SLIST_HEAD(magazinelist, magazine); 296 297 static spinlock_t zone_mag_lock; 298 static spinlock_t depot_spinlock; 299 static struct magazine zone_magazine = { 300 .flags = M_BURST | M_BURST_EARLY, 301 .capacity = M_ZONE_ROUNDS, 302 .rounds = 0, 303 .burst_factor = M_BURST_FACTOR, 304 .low_factor = M_LOW_ROUNDS 305 }; 306 307 #define MAGAZINE_FULL(mp) (mp->rounds == mp->capacity) 308 #define MAGAZINE_NOTFULL(mp) (mp->rounds < mp->capacity) 309 #define MAGAZINE_EMPTY(mp) (mp->rounds == 0) 310 #define MAGAZINE_NOTEMPTY(mp) (mp->rounds != 0) 311 312 /* 313 * Each thread will have a pair of magazines per size-class (NZONES) 314 * The loaded magazine will support immediate allocations, the previous 315 * magazine will either be full or empty and can be swapped at need 316 */ 317 typedef struct magazine_pair { 318 struct magazine *loaded; 319 struct magazine *prev; 320 } magazine_pair; 321 322 /* A depot is a collection of magazines for a single zone. */ 323 typedef struct magazine_depot { 324 struct magazinelist full; 325 struct magazinelist empty; 326 spinlock_t lock; 327 } magazine_depot; 328 329 typedef struct thr_mags { 330 magazine_pair mags[NZONES]; 331 struct magazine *newmag; 332 int init; 333 } thr_mags; 334 335 static __thread thr_mags thread_mags TLS_ATTRIBUTE; 336 static pthread_key_t thread_mags_key; 337 static pthread_once_t thread_mags_once = PTHREAD_ONCE_INIT; 338 static magazine_depot depots[NZONES]; 339 340 /* 341 * Fixed globals (not per-cpu) 342 */ 343 static const int ZoneSize = ZALLOC_ZONE_SIZE; 344 static const int ZoneLimit = ZALLOC_ZONE_LIMIT; 345 static const int ZonePageCount = ZALLOC_ZONE_SIZE / PAGE_SIZE; 346 static const int ZoneMask = ZALLOC_ZONE_SIZE - 1; 347 348 static int opt_madvise = 0; 349 static int opt_utrace = 0; 350 static int g_malloc_flags = 0; 351 static struct slglobaldata SLGlobalData; 352 static bigalloc_t bigalloc_array[BIGHSIZE]; 353 static spinlock_t bigspin_array[BIGXSIZE]; 354 static volatile void *bigcache_array[BIGCACHE]; /* atomic swap */ 355 static volatile size_t bigcache_size_array[BIGCACHE]; /* SMP races ok */ 356 static volatile int bigcache_index; /* SMP races ok */ 357 static int malloc_panic; 358 static size_t excess_alloc; /* excess big allocs */ 359 360 static void *_slaballoc(size_t size, int flags); 361 static void *_slabrealloc(void *ptr, size_t size); 362 static void _slabfree(void *ptr, int, bigalloc_t *); 363 static int _slabmemalign(void **memptr, size_t alignment, size_t size); 364 static void *_vmem_alloc(size_t bytes, size_t align, int flags); 365 static void _vmem_free(void *ptr, size_t bytes); 366 static void *magazine_alloc(struct magazine *, int *); 367 static int magazine_free(struct magazine *, void *); 368 static void *mtmagazine_alloc(int zi); 369 static int mtmagazine_free(int zi, void *); 370 static void mtmagazine_init(void); 371 static void mtmagazine_destructor(void *); 372 static slzone_t zone_alloc(int flags); 373 static void zone_free(void *z); 374 static void _mpanic(const char *ctl, ...) __printflike(1, 2); 375 static void malloc_init(void) __constructor(101); 376 377 struct nmalloc_utrace { 378 void *p; 379 size_t s; 380 void *r; 381 }; 382 383 #define UTRACE(a, b, c) \ 384 if (opt_utrace) { \ 385 struct nmalloc_utrace ut = { \ 386 .p = (a), \ 387 .s = (b), \ 388 .r = (c) \ 389 }; \ 390 utrace(&ut, sizeof(ut)); \ 391 } 392 393 static void 394 malloc_init(void) 395 { 396 const char *p = NULL; 397 398 if (issetugid() == 0) 399 p = getenv("MALLOC_OPTIONS"); 400 401 for (; p != NULL && *p != '\0'; p++) { 402 switch(*p) { 403 case 'u': opt_utrace = 0; break; 404 case 'U': opt_utrace = 1; break; 405 case 'h': opt_madvise = 0; break; 406 case 'H': opt_madvise = 1; break; 407 case 'z': g_malloc_flags = 0; break; 408 case 'Z': g_malloc_flags = SAFLAG_ZERO; break; 409 default: 410 break; 411 } 412 } 413 414 UTRACE((void *) -1, 0, NULL); 415 } 416 417 /* 418 * We have to install a handler for nmalloc thread teardowns when 419 * the thread is created. We cannot delay this because destructors in 420 * sophisticated userland programs can call malloc() for the first time 421 * during their thread exit. 422 * 423 * This routine is called directly from pthreads. 424 */ 425 void 426 _nmalloc_thr_init(void) 427 { 428 thr_mags *tp; 429 430 /* 431 * Disallow mtmagazine operations until the mtmagazine is 432 * initialized. 433 */ 434 tp = &thread_mags; 435 tp->init = -1; 436 437 _pthread_once(&thread_mags_once, mtmagazine_init); 438 _pthread_setspecific(thread_mags_key, tp); 439 tp->init = 1; 440 } 441 442 void 443 _nmalloc_thr_prepfork(void) 444 { 445 if (__isthreaded) { 446 _SPINLOCK(&zone_mag_lock); 447 _SPINLOCK(&depot_spinlock); 448 } 449 } 450 451 void 452 _nmalloc_thr_parentfork(void) 453 { 454 if (__isthreaded) { 455 _SPINUNLOCK(&depot_spinlock); 456 _SPINUNLOCK(&zone_mag_lock); 457 } 458 } 459 460 void 461 _nmalloc_thr_childfork(void) 462 { 463 if (__isthreaded) { 464 _SPINUNLOCK(&depot_spinlock); 465 _SPINUNLOCK(&zone_mag_lock); 466 } 467 } 468 469 /* 470 * Handle signal reentrancy safely whether we are threaded or not. 471 * This improves the stability for mono and will probably improve 472 * stability for other high-level languages which are becoming increasingly 473 * sophisticated. 474 * 475 * The sigblockall()/sigunblockall() implementation uses a counter on 476 * a per-thread shared user/kernel page, avoids system calls, and is thus 477 * very fast. 478 */ 479 static __inline void 480 nmalloc_sigblockall(void) 481 { 482 sigblockall(); 483 } 484 485 static __inline void 486 nmalloc_sigunblockall(void) 487 { 488 sigunblockall(); 489 } 490 491 /* 492 * Thread locks. 493 */ 494 static __inline void 495 slgd_lock(slglobaldata_t slgd) 496 { 497 if (__isthreaded) 498 _SPINLOCK(&slgd->Spinlock); 499 else 500 sigblockall(); 501 } 502 503 static __inline void 504 slgd_unlock(slglobaldata_t slgd) 505 { 506 if (__isthreaded) 507 _SPINUNLOCK(&slgd->Spinlock); 508 else 509 sigunblockall(); 510 } 511 512 static __inline void 513 depot_lock(magazine_depot *dp __unused) 514 { 515 if (__isthreaded) 516 _SPINLOCK(&depot_spinlock); 517 else 518 sigblockall(); 519 #if 0 520 if (__isthreaded) 521 _SPINLOCK(&dp->lock); 522 #endif 523 } 524 525 static __inline void 526 depot_unlock(magazine_depot *dp __unused) 527 { 528 if (__isthreaded) 529 _SPINUNLOCK(&depot_spinlock); 530 else 531 sigunblockall(); 532 #if 0 533 if (__isthreaded) 534 _SPINUNLOCK(&dp->lock); 535 #endif 536 } 537 538 static __inline void 539 zone_magazine_lock(void) 540 { 541 if (__isthreaded) 542 _SPINLOCK(&zone_mag_lock); 543 else 544 sigblockall(); 545 } 546 547 static __inline void 548 zone_magazine_unlock(void) 549 { 550 if (__isthreaded) 551 _SPINUNLOCK(&zone_mag_lock); 552 else 553 sigunblockall(); 554 } 555 556 static __inline void 557 swap_mags(magazine_pair *mp) 558 { 559 struct magazine *tmp; 560 tmp = mp->loaded; 561 mp->loaded = mp->prev; 562 mp->prev = tmp; 563 } 564 565 /* 566 * bigalloc hashing and locking support. 567 * 568 * Return an unmasked hash code for the passed pointer. 569 */ 570 static __inline int 571 _bigalloc_hash(void *ptr) 572 { 573 int hv; 574 575 hv = ((int)(intptr_t)ptr >> PAGE_SHIFT) ^ 576 ((int)(intptr_t)ptr >> (PAGE_SHIFT + BIGHSHIFT)); 577 578 return(hv); 579 } 580 581 /* 582 * Lock the hash chain and return a pointer to its base for the specified 583 * address. 584 */ 585 static __inline bigalloc_t * 586 bigalloc_lock(void *ptr) 587 { 588 int hv = _bigalloc_hash(ptr); 589 bigalloc_t *bigp; 590 591 bigp = &bigalloc_array[hv & BIGHMASK]; 592 if (__isthreaded) 593 _SPINLOCK(&bigspin_array[hv & BIGXMASK]); 594 return(bigp); 595 } 596 597 /* 598 * Lock the hash chain and return a pointer to its base for the specified 599 * address. 600 * 601 * BUT, if the hash chain is empty, just return NULL and do not bother 602 * to lock anything. 603 */ 604 static __inline bigalloc_t * 605 bigalloc_check_and_lock(void *ptr) 606 { 607 int hv = _bigalloc_hash(ptr); 608 bigalloc_t *bigp; 609 610 bigp = &bigalloc_array[hv & BIGHMASK]; 611 if (*bigp == NULL) 612 return(NULL); 613 if (__isthreaded) { 614 _SPINLOCK(&bigspin_array[hv & BIGXMASK]); 615 } 616 return(bigp); 617 } 618 619 static __inline void 620 bigalloc_unlock(void *ptr) 621 { 622 int hv; 623 624 if (__isthreaded) { 625 hv = _bigalloc_hash(ptr); 626 _SPINUNLOCK(&bigspin_array[hv & BIGXMASK]); 627 } 628 } 629 630 /* 631 * Find a bigcache entry that might work for the allocation. SMP races are 632 * ok here except for the swap (that is, it is ok if bigcache_size_array[i] 633 * is wrong or if a NULL or too-small big is returned). 634 * 635 * Generally speaking it is ok to find a large entry even if the bytes 636 * requested are relatively small (but still oversized), because we really 637 * don't know *what* the application is going to do with the buffer. 638 */ 639 static __inline 640 bigalloc_t 641 bigcache_find_alloc(size_t bytes) 642 { 643 bigalloc_t big = NULL; 644 size_t test; 645 int i; 646 647 for (i = 0; i < BIGCACHE; ++i) { 648 test = bigcache_size_array[i]; 649 if (bytes <= test) { 650 bigcache_size_array[i] = 0; 651 big = atomic_swap_ptr(&bigcache_array[i], NULL); 652 break; 653 } 654 } 655 return big; 656 } 657 658 /* 659 * Free a bigcache entry, possibly returning one that the caller really must 660 * free. This is used to cache recent oversized memory blocks. Only 661 * big blocks smaller than BIGCACHE_LIMIT will be cached this way, so try 662 * to collect the biggest ones we can that are under the limit. 663 */ 664 static __inline 665 bigalloc_t 666 bigcache_find_free(bigalloc_t big) 667 { 668 int i; 669 int j; 670 int b; 671 672 b = ++bigcache_index; 673 for (i = 0; i < BIGCACHE; ++i) { 674 j = (b + i) & BIGCACHE_MASK; 675 if (bigcache_size_array[j] < big->bytes) { 676 bigcache_size_array[j] = big->bytes; 677 big = atomic_swap_ptr(&bigcache_array[j], big); 678 break; 679 } 680 } 681 return big; 682 } 683 684 static __inline 685 void 686 handle_excess_big(void) 687 { 688 int i; 689 bigalloc_t big; 690 bigalloc_t *bigp; 691 692 if (excess_alloc <= BIGCACHE_EXCESS) 693 return; 694 695 for (i = 0; i < BIGHSIZE; ++i) { 696 bigp = &bigalloc_array[i]; 697 if (*bigp == NULL) 698 continue; 699 if (__isthreaded) 700 _SPINLOCK(&bigspin_array[i & BIGXMASK]); 701 for (big = *bigp; big; big = big->next) { 702 if (big->active < big->bytes) { 703 MASSERT_WTHUNLK((big->active & PAGE_MASK) == 0, 704 _SPINUNLOCK(&bigspin_array[i & BIGXMASK])); 705 MASSERT_WTHUNLK((big->bytes & PAGE_MASK) == 0, 706 _SPINUNLOCK(&bigspin_array[i & BIGXMASK])); 707 munmap((char *)big->base + big->active, 708 big->bytes - big->active); 709 atomic_add_long(&excess_alloc, 710 big->active - big->bytes); 711 big->bytes = big->active; 712 } 713 } 714 if (__isthreaded) 715 _SPINUNLOCK(&bigspin_array[i & BIGXMASK]); 716 } 717 } 718 719 /* 720 * Calculate the zone index for the allocation request size and set the 721 * allocation request size to that particular zone's chunk size. 722 */ 723 static __inline int 724 zoneindex(size_t *bytes, size_t *chunking) 725 { 726 size_t n = (unsigned int)*bytes; /* unsigned for shift opt */ 727 728 /* 729 * This used to be 8-byte chunks and 16 zones for n < 128. 730 * However some instructions may require 16-byte alignment 731 * (aka SIMD) and programs might not request an aligned size 732 * (aka GCC-7), so change this as follows: 733 * 734 * 0-15 bytes 8-byte alignment in two zones (0-1) 735 * 16-127 bytes 16-byte alignment in four zones (3-10) 736 * zone index 2 and 11-15 are currently unused. 737 */ 738 if (n < 16) { 739 *bytes = n = (n + 7) & ~7; 740 *chunking = 8; 741 return(n / 8 - 1); /* 8 byte chunks, 2 zones */ 742 /* zones 0,1, zone 2 is unused */ 743 } 744 if (n < 128) { 745 *bytes = n = (n + 15) & ~15; 746 *chunking = 16; 747 return(n / 16 + 2); /* 16 byte chunks, 8 zones */ 748 /* zones 3-10, zones 11-15 unused */ 749 } 750 if (n < 256) { 751 *bytes = n = (n + 15) & ~15; 752 *chunking = 16; 753 return(n / 16 + 7); 754 } 755 if (n < 8192) { 756 if (n < 512) { 757 *bytes = n = (n + 31) & ~31; 758 *chunking = 32; 759 return(n / 32 + 15); 760 } 761 if (n < 1024) { 762 *bytes = n = (n + 63) & ~63; 763 *chunking = 64; 764 return(n / 64 + 23); 765 } 766 if (n < 2048) { 767 *bytes = n = (n + 127) & ~127; 768 *chunking = 128; 769 return(n / 128 + 31); 770 } 771 if (n < 4096) { 772 *bytes = n = (n + 255) & ~255; 773 *chunking = 256; 774 return(n / 256 + 39); 775 } 776 *bytes = n = (n + 511) & ~511; 777 *chunking = 512; 778 return(n / 512 + 47); 779 } 780 #if ZALLOC_ZONE_LIMIT > 8192 781 if (n < 16384) { 782 *bytes = n = (n + 1023) & ~1023; 783 *chunking = 1024; 784 return(n / 1024 + 55); 785 } 786 #endif 787 #if ZALLOC_ZONE_LIMIT > 16384 788 if (n < 32768) { 789 *bytes = n = (n + 2047) & ~2047; 790 *chunking = 2048; 791 return(n / 2048 + 63); 792 } 793 #endif 794 _mpanic("Unexpected byte count %zu", n); 795 return(0); 796 } 797 798 /* 799 * malloc() - call internal slab allocator 800 */ 801 void * 802 __malloc(size_t size) 803 { 804 void *ptr; 805 806 nmalloc_sigblockall(); 807 ptr = _slaballoc(size, 0); 808 if (ptr == NULL) 809 errno = ENOMEM; 810 else 811 UTRACE(0, size, ptr); 812 nmalloc_sigunblockall(); 813 814 return(ptr); 815 } 816 817 #define MUL_NO_OVERFLOW (1UL << (sizeof(size_t) * 4)) 818 819 /* 820 * calloc() - call internal slab allocator 821 */ 822 void * 823 __calloc(size_t number, size_t size) 824 { 825 void *ptr; 826 827 if ((number >= MUL_NO_OVERFLOW || size >= MUL_NO_OVERFLOW) && 828 number > 0 && SIZE_MAX / number < size) { 829 errno = ENOMEM; 830 return(NULL); 831 } 832 833 nmalloc_sigblockall(); 834 ptr = _slaballoc(number * size, SAFLAG_ZERO); 835 if (ptr == NULL) 836 errno = ENOMEM; 837 else 838 UTRACE(0, number * size, ptr); 839 nmalloc_sigunblockall(); 840 841 return(ptr); 842 } 843 844 /* 845 * realloc() (SLAB ALLOCATOR) 846 * 847 * We do not attempt to optimize this routine beyond reusing the same 848 * pointer if the new size fits within the chunking of the old pointer's 849 * zone. 850 */ 851 void * 852 __realloc(void *ptr, size_t size) 853 { 854 void *ret; 855 856 nmalloc_sigblockall(); 857 ret = _slabrealloc(ptr, size); 858 if (ret == NULL) 859 errno = ENOMEM; 860 else 861 UTRACE(ptr, size, ret); 862 nmalloc_sigunblockall(); 863 864 return(ret); 865 } 866 867 /* 868 * aligned_alloc() 869 * 870 * Allocate (size) bytes with a alignment of (alignment). 871 */ 872 void * 873 __aligned_alloc(size_t alignment, size_t size) 874 { 875 void *ptr; 876 int rc; 877 878 nmalloc_sigblockall(); 879 ptr = NULL; 880 rc = _slabmemalign(&ptr, alignment, size); 881 if (rc) 882 errno = rc; 883 nmalloc_sigunblockall(); 884 885 return (ptr); 886 } 887 888 /* 889 * posix_memalign() 890 * 891 * Allocate (size) bytes with a alignment of (alignment), where (alignment) 892 * is a power of 2 >= sizeof(void *). 893 */ 894 int 895 __posix_memalign(void **memptr, size_t alignment, size_t size) 896 { 897 int rc; 898 899 /* 900 * OpenGroup spec issue 6 check 901 */ 902 if (alignment < sizeof(void *)) { 903 *memptr = NULL; 904 return(EINVAL); 905 } 906 907 nmalloc_sigblockall(); 908 rc = _slabmemalign(memptr, alignment, size); 909 nmalloc_sigunblockall(); 910 911 return (rc); 912 } 913 914 /* 915 * The slab allocator will allocate on power-of-2 boundaries up to 916 * at least PAGE_SIZE. We use the zoneindex mechanic to find a 917 * zone matching the requirements, and _vmem_alloc() otherwise. 918 */ 919 static int 920 _slabmemalign(void **memptr, size_t alignment, size_t size) 921 { 922 bigalloc_t *bigp; 923 bigalloc_t big; 924 size_t chunking; 925 int zi __unused; 926 927 if (alignment < 1) { 928 *memptr = NULL; 929 return(EINVAL); 930 } 931 932 /* 933 * OpenGroup spec issue 6 checks 934 */ 935 if ((alignment | (alignment - 1)) + 1 != (alignment << 1)) { 936 *memptr = NULL; 937 return(EINVAL); 938 } 939 940 /* 941 * Our zone mechanism guarantees same-sized alignment for any 942 * power-of-2 allocation. If size is a power-of-2 and reasonable 943 * we can just call _slaballoc() and be done. We round size up 944 * to the nearest alignment boundary to improve our odds of 945 * it becoming a power-of-2 if it wasn't before. 946 */ 947 if (size <= alignment) 948 size = alignment; 949 else 950 size = (size + alignment - 1) & ~(size_t)(alignment - 1); 951 952 /* 953 * If we have overflowed above when rounding to the nearest alignment 954 * boundary, just return ENOMEM, size should be == N * sizeof(void *). 955 * 956 * Power-of-2 allocations up to 8KB will be aligned to the allocation 957 * size and _slaballoc() can simply be used. Please see line 1082 958 * for this special case: 'Align the storage in the zone based on 959 * the chunking' has a special case for powers of 2. 960 */ 961 if (size == 0) 962 return(ENOMEM); 963 964 if (size <= PAGE_SIZE*2 && (size | (size - 1)) + 1 == (size << 1)) { 965 *memptr = _slaballoc(size, 0); 966 return(*memptr ? 0 : ENOMEM); 967 } 968 969 /* 970 * Otherwise locate a zone with a chunking that matches 971 * the requested alignment, within reason. Consider two cases: 972 * 973 * (1) A 1K allocation on a 32-byte alignment. The first zoneindex 974 * we find will be the best fit because the chunking will be 975 * greater or equal to the alignment. 976 * 977 * (2) A 513 allocation on a 256-byte alignment. In this case 978 * the first zoneindex we find will be for 576 byte allocations 979 * with a chunking of 64, which is not sufficient. To fix this 980 * we simply find the nearest power-of-2 >= size and use the 981 * same side-effect of _slaballoc() which guarantees 982 * same-alignment on a power-of-2 allocation. 983 */ 984 if (size < PAGE_SIZE) { 985 zi = zoneindex(&size, &chunking); 986 if (chunking >= alignment) { 987 *memptr = _slaballoc(size, 0); 988 return(*memptr ? 0 : ENOMEM); 989 } 990 if (size >= 1024) 991 alignment = 1024; 992 if (size >= 16384) 993 alignment = 16384; 994 while (alignment < size) 995 alignment <<= 1; 996 *memptr = _slaballoc(alignment, 0); 997 return(*memptr ? 0 : ENOMEM); 998 } 999 1000 /* 1001 * If the slab allocator cannot handle it use vmem_alloc(). 1002 * 1003 * Alignment must be adjusted up to at least PAGE_SIZE in this case. 1004 */ 1005 if (alignment < PAGE_SIZE) 1006 alignment = PAGE_SIZE; 1007 if (size < alignment) 1008 size = alignment; 1009 size = (size + PAGE_MASK) & ~(size_t)PAGE_MASK; 1010 if (alignment == PAGE_SIZE && size <= BIGCACHE_LIMIT) { 1011 big = bigcache_find_alloc(size); 1012 if (big && big->bytes < size) { 1013 _slabfree(big->base, FASTSLABREALLOC, &big); 1014 big = NULL; 1015 } 1016 if (big) { 1017 *memptr = big->base; 1018 big->active = size; 1019 if (big->active < big->bytes) { 1020 atomic_add_long(&excess_alloc, 1021 big->bytes - big->active); 1022 } 1023 bigp = bigalloc_lock(*memptr); 1024 big->next = *bigp; 1025 *bigp = big; 1026 bigalloc_unlock(*memptr); 1027 handle_excess_big(); 1028 return(0); 1029 } 1030 } 1031 *memptr = _vmem_alloc(size, alignment, 0); 1032 if (*memptr == NULL) 1033 return(ENOMEM); 1034 1035 big = _slaballoc(sizeof(struct bigalloc), 0); 1036 if (big == NULL) { 1037 _vmem_free(*memptr, size); 1038 *memptr = NULL; 1039 return(ENOMEM); 1040 } 1041 bigp = bigalloc_lock(*memptr); 1042 big->base = *memptr; 1043 big->active = size; 1044 big->bytes = size; /* no excess */ 1045 big->next = *bigp; 1046 *bigp = big; 1047 bigalloc_unlock(*memptr); 1048 1049 return(0); 1050 } 1051 1052 /* 1053 * free() (SLAB ALLOCATOR) - do the obvious 1054 */ 1055 void 1056 __free(void *ptr) 1057 { 1058 UTRACE(ptr, 0, 0); 1059 nmalloc_sigblockall(); 1060 _slabfree(ptr, 0, NULL); 1061 nmalloc_sigunblockall(); 1062 } 1063 1064 /* 1065 * _slaballoc() (SLAB ALLOCATOR) 1066 * 1067 * Allocate memory via the slab allocator. If the request is too large, 1068 * or if it page-aligned beyond a certain size, we fall back to the 1069 * KMEM subsystem 1070 */ 1071 static void * 1072 _slaballoc(size_t size, int flags) 1073 { 1074 slzone_t z; 1075 slchunk_t chunk; 1076 slglobaldata_t slgd; 1077 size_t chunking; 1078 int zi; 1079 int off; 1080 void *obj; 1081 1082 /* 1083 * Handle the degenerate size == 0 case. Yes, this does happen. 1084 * Return a special pointer. This is to maintain compatibility with 1085 * the original malloc implementation. Certain devices, such as the 1086 * adaptec driver, not only allocate 0 bytes, they check for NULL and 1087 * also realloc() later on. Joy. 1088 */ 1089 if (size == 0) 1090 size = 1; 1091 1092 /* Capture global flags */ 1093 flags |= g_malloc_flags; 1094 1095 /* 1096 * Handle large allocations directly. There should not be very many 1097 * of these so performance is not a big issue. 1098 * 1099 * The backend allocator is pretty nasty on a SMP system. Use the 1100 * slab allocator for one and two page-sized chunks even though we 1101 * lose some efficiency. 1102 * 1103 * NOTE: Please see posix_memalign around line 864, which assumes 1104 * that power-of-2 allocations of PAGE_SIZE and PAGE_SIZE*2 1105 * can use _slaballoc() and be aligned to the same. The 1106 * zone cache can be used for this case, bigalloc does not 1107 * have to be used. 1108 */ 1109 if (size >= ZoneLimit || 1110 ((size & PAGE_MASK) == 0 && size > PAGE_SIZE*2)) { 1111 bigalloc_t big; 1112 bigalloc_t *bigp; 1113 1114 /* 1115 * Page-align and cache-color in case of virtually indexed 1116 * physically tagged L1 caches (aka SandyBridge). No sweat 1117 * otherwise, so just do it. 1118 * 1119 * (don't count as excess). 1120 */ 1121 size = (size + PAGE_MASK) & ~(size_t)PAGE_MASK; 1122 1123 /* 1124 * If we have overflowed above when rounding to the page 1125 * boundary, something has passed us (size_t)[-PAGE_MASK..-1] 1126 * so just return NULL, size at this point should be >= 0. 1127 */ 1128 if (size == 0) 1129 return (NULL); 1130 1131 if ((size & (PAGE_SIZE * 2 - 1)) == 0) 1132 size += PAGE_SIZE; 1133 1134 /* 1135 * Try to reuse a cached big block to avoid mmap'ing. If it 1136 * turns out not to fit our requirements we throw it away 1137 * and allocate normally. 1138 */ 1139 big = NULL; 1140 if (size <= BIGCACHE_LIMIT) { 1141 big = bigcache_find_alloc(size); 1142 if (big && big->bytes < size) { 1143 _slabfree(big->base, FASTSLABREALLOC, &big); 1144 big = NULL; 1145 } 1146 } 1147 if (big) { 1148 chunk = big->base; 1149 if (flags & SAFLAG_ZERO) 1150 bzero(chunk, size); 1151 } else { 1152 chunk = _vmem_alloc(size, PAGE_SIZE, flags); 1153 if (chunk == NULL) 1154 return(NULL); 1155 1156 big = _slaballoc(sizeof(struct bigalloc), 0); 1157 if (big == NULL) { 1158 _vmem_free(chunk, size); 1159 return(NULL); 1160 } 1161 big->base = chunk; 1162 big->bytes = size; 1163 } 1164 big->active = size; 1165 1166 bigp = bigalloc_lock(chunk); 1167 if (big->active < big->bytes) { 1168 atomic_add_long(&excess_alloc, 1169 big->bytes - big->active); 1170 } 1171 big->next = *bigp; 1172 *bigp = big; 1173 bigalloc_unlock(chunk); 1174 handle_excess_big(); 1175 1176 return(chunk); 1177 } 1178 1179 /* Compute allocation zone; zoneindex will panic on excessive sizes */ 1180 zi = zoneindex(&size, &chunking); 1181 MASSERT(zi < NZONES); 1182 1183 obj = mtmagazine_alloc(zi); 1184 if (obj != NULL) { 1185 if (flags & SAFLAG_ZERO) 1186 bzero(obj, size); 1187 return (obj); 1188 } 1189 1190 slgd = &SLGlobalData; 1191 slgd_lock(slgd); 1192 1193 /* 1194 * Attempt to allocate out of an existing zone. If all zones are 1195 * exhausted pull one off the free list or allocate a new one. 1196 */ 1197 if ((z = slgd->ZoneAry[zi]) == NULL) { 1198 z = zone_alloc(flags); 1199 if (z == NULL) 1200 goto fail; 1201 1202 /* 1203 * How big is the base structure? 1204 */ 1205 off = sizeof(struct slzone); 1206 1207 /* 1208 * Align the storage in the zone based on the chunking. 1209 * 1210 * Guarantee power-of-2 alignment for power-of-2-sized 1211 * chunks. Otherwise align based on the chunking size 1212 * (typically 8 or 16 bytes for small allocations). 1213 * 1214 * NOTE: Allocations >= ZoneLimit are governed by the 1215 * bigalloc code and typically only guarantee page-alignment. 1216 * 1217 * Set initial conditions for UIndex near the zone header 1218 * to reduce unecessary page faults, vs semi-randomization 1219 * to improve L1 cache saturation. 1220 * 1221 * NOTE: Please see posix_memalign around line 864-ish, which 1222 * assumes that power-of-2 allocations of PAGE_SIZE 1223 * and PAGE_SIZE*2 can use _slaballoc() and be aligned 1224 * to the same. The zone cache can be used for this 1225 * case, bigalloc does not have to be used. 1226 * 1227 * ALL power-of-2 requests that fall through to this 1228 * code use this rule (conditionals above limit this 1229 * to <= PAGE_SIZE*2. 1230 */ 1231 if ((size | (size - 1)) + 1 == (size << 1)) 1232 off = roundup2(off, size); 1233 else 1234 off = roundup2(off, chunking); 1235 z->z_Magic = ZALLOC_SLAB_MAGIC; 1236 z->z_ZoneIndex = zi; 1237 z->z_NMax = (ZoneSize - off) / size; 1238 z->z_NFree = z->z_NMax; 1239 z->z_BasePtr = (char *)z + off; 1240 z->z_UIndex = z->z_UEndIndex = 0; 1241 z->z_ChunkSize = size; 1242 z->z_FirstFreePg = ZonePageCount; 1243 z->z_Next = slgd->ZoneAry[zi]; 1244 slgd->ZoneAry[zi] = z; 1245 if ((z->z_Flags & SLZF_UNOTZEROD) == 0) { 1246 flags &= ~SAFLAG_ZERO; /* already zero'd */ 1247 flags |= SAFLAG_PASSIVE; 1248 } 1249 1250 /* 1251 * Slide the base index for initial allocations out of the 1252 * next zone we create so we do not over-weight the lower 1253 * part of the cpu memory caches. 1254 */ 1255 slgd->JunkIndex = (slgd->JunkIndex + ZALLOC_SLAB_SLIDE) 1256 & (ZALLOC_MAX_ZONE_SIZE - 1); 1257 } 1258 1259 /* 1260 * Ok, we have a zone from which at least one chunk is available. 1261 * 1262 * Remove us from the ZoneAry[] when we become empty 1263 */ 1264 MASSERT_WTHUNLK(z->z_NFree > 0, slgd_unlock(slgd)); 1265 1266 if (--z->z_NFree == 0) { 1267 slgd->ZoneAry[zi] = z->z_Next; 1268 z->z_Next = NULL; 1269 } 1270 1271 /* 1272 * Locate a chunk in a free page. This attempts to localize 1273 * reallocations into earlier pages without us having to sort 1274 * the chunk list. A chunk may still overlap a page boundary. 1275 */ 1276 while (z->z_FirstFreePg < ZonePageCount) { 1277 if ((chunk = z->z_PageAry[z->z_FirstFreePg]) != NULL) { 1278 if (((uintptr_t)chunk & ZoneMask) == 0) { 1279 slgd_unlock(slgd); 1280 _mpanic("assertion: corrupt malloc zone"); 1281 } 1282 z->z_PageAry[z->z_FirstFreePg] = chunk->c_Next; 1283 goto done; 1284 } 1285 ++z->z_FirstFreePg; 1286 } 1287 1288 /* 1289 * No chunks are available but NFree said we had some memory, 1290 * so it must be available in the never-before-used-memory 1291 * area governed by UIndex. The consequences are very 1292 * serious if our zone got corrupted so we use an explicit 1293 * panic rather then a KASSERT. 1294 */ 1295 chunk = (slchunk_t)(z->z_BasePtr + z->z_UIndex * size); 1296 1297 if (++z->z_UIndex == z->z_NMax) 1298 z->z_UIndex = 0; 1299 if (z->z_UIndex == z->z_UEndIndex) { 1300 if (z->z_NFree != 0) 1301 _mpanic("slaballoc: corrupted zone"); 1302 } 1303 1304 if ((z->z_Flags & SLZF_UNOTZEROD) == 0) { 1305 flags &= ~SAFLAG_ZERO; 1306 flags |= SAFLAG_PASSIVE; 1307 } 1308 1309 done: 1310 slgd_unlock(slgd); 1311 if (flags & SAFLAG_ZERO) 1312 bzero(chunk, size); 1313 return(chunk); 1314 fail: 1315 slgd_unlock(slgd); 1316 return(NULL); 1317 } 1318 1319 /* 1320 * Reallocate memory within the chunk 1321 */ 1322 static void * 1323 _slabrealloc(void *ptr, size_t size) 1324 { 1325 bigalloc_t *bigp; 1326 void *nptr; 1327 slzone_t z; 1328 size_t chunking; 1329 1330 if (ptr == NULL) { 1331 return(_slaballoc(size, 0)); 1332 } 1333 1334 if (size == 0) 1335 size = 1; 1336 1337 /* 1338 * Handle oversized allocations. 1339 */ 1340 if ((bigp = bigalloc_check_and_lock(ptr)) != NULL) { 1341 bigalloc_t big; 1342 size_t bigbytes; 1343 1344 while ((big = *bigp) != NULL) { 1345 if (big->base == ptr) { 1346 size = (size + PAGE_MASK) & ~(size_t)PAGE_MASK; 1347 bigbytes = big->bytes; 1348 1349 /* 1350 * If it already fits determine if it makes 1351 * sense to shrink/reallocate. Try to optimize 1352 * programs which stupidly make incremental 1353 * reallocations larger or smaller by scaling 1354 * the allocation. Also deal with potential 1355 * coloring. 1356 */ 1357 if (size >= (bigbytes >> 1) && 1358 size <= bigbytes) { 1359 if (big->active != size) { 1360 atomic_add_long(&excess_alloc, 1361 big->active - 1362 size); 1363 } 1364 big->active = size; 1365 bigalloc_unlock(ptr); 1366 return(ptr); 1367 } 1368 1369 /* 1370 * For large reallocations, allocate more space 1371 * than we need to try to avoid excessive 1372 * reallocations later on. 1373 */ 1374 chunking = size + (size >> 3); 1375 chunking = (chunking + PAGE_MASK) & 1376 ~(size_t)PAGE_MASK; 1377 1378 /* 1379 * Try to allocate adjacently in case the 1380 * program is idiotically realloc()ing a 1381 * huge memory block just slightly bigger. 1382 * (llvm's llc tends to do this a lot). 1383 * 1384 * (MAP_TRYFIXED forces mmap to fail if there 1385 * is already something at the address). 1386 */ 1387 if (chunking > bigbytes) { 1388 char *addr; 1389 int errno_save = errno; 1390 1391 addr = mmap((char *)ptr + bigbytes, 1392 chunking - bigbytes, 1393 PROT_READ|PROT_WRITE, 1394 MAP_PRIVATE|MAP_ANON| 1395 MAP_TRYFIXED, 1396 -1, 0); 1397 errno = errno_save; 1398 if (addr == (char *)ptr + bigbytes) { 1399 atomic_add_long(&excess_alloc, 1400 big->active - 1401 big->bytes + 1402 chunking - 1403 size); 1404 big->bytes = chunking; 1405 big->active = size; 1406 bigalloc_unlock(ptr); 1407 1408 return(ptr); 1409 } 1410 MASSERT_WTHUNLK( 1411 (void *)addr == MAP_FAILED, 1412 bigalloc_unlock(ptr)); 1413 } 1414 1415 /* 1416 * Failed, unlink big and allocate fresh. 1417 * (note that we have to leave (big) intact 1418 * in case the slaballoc fails). 1419 */ 1420 *bigp = big->next; 1421 bigalloc_unlock(ptr); 1422 if ((nptr = _slaballoc(size, 0)) == NULL) { 1423 /* Relink block */ 1424 bigp = bigalloc_lock(ptr); 1425 big->next = *bigp; 1426 *bigp = big; 1427 bigalloc_unlock(ptr); 1428 return(NULL); 1429 } 1430 if (size > bigbytes) 1431 size = bigbytes; 1432 bcopy(ptr, nptr, size); 1433 atomic_add_long(&excess_alloc, big->active - 1434 big->bytes); 1435 _slabfree(ptr, FASTSLABREALLOC, &big); 1436 1437 return(nptr); 1438 } 1439 bigp = &big->next; 1440 } 1441 bigalloc_unlock(ptr); 1442 handle_excess_big(); 1443 } 1444 1445 /* 1446 * Get the original allocation's zone. If the new request winds 1447 * up using the same chunk size we do not have to do anything. 1448 * 1449 * NOTE: We don't have to lock the globaldata here, the fields we 1450 * access here will not change at least as long as we have control 1451 * over the allocation. 1452 */ 1453 z = (slzone_t)((uintptr_t)ptr & ~(uintptr_t)ZoneMask); 1454 MASSERT(z->z_Magic == ZALLOC_SLAB_MAGIC); 1455 1456 /* 1457 * Use zoneindex() to chunk-align the new size, as long as the 1458 * new size is not too large. 1459 */ 1460 if (size < ZoneLimit) { 1461 zoneindex(&size, &chunking); 1462 if (z->z_ChunkSize == size) { 1463 return(ptr); 1464 } 1465 } 1466 1467 /* 1468 * Allocate memory for the new request size and copy as appropriate. 1469 */ 1470 if ((nptr = _slaballoc(size, 0)) != NULL) { 1471 if (size > z->z_ChunkSize) 1472 size = z->z_ChunkSize; 1473 bcopy(ptr, nptr, size); 1474 _slabfree(ptr, 0, NULL); 1475 } 1476 1477 return(nptr); 1478 } 1479 1480 /* 1481 * free (SLAB ALLOCATOR) 1482 * 1483 * Free a memory block previously allocated by malloc. Note that we do not 1484 * attempt to uplodate ks_loosememuse as MP races could prevent us from 1485 * checking memory limits in malloc. 1486 * 1487 * flags: 1488 * FASTSLABREALLOC Fast call from realloc, *rbigp already 1489 * unlinked. 1490 * 1491 * MPSAFE 1492 */ 1493 static void 1494 _slabfree(void *ptr, int flags, bigalloc_t *rbigp) 1495 { 1496 slzone_t z; 1497 slchunk_t chunk; 1498 bigalloc_t big; 1499 bigalloc_t *bigp; 1500 slglobaldata_t slgd; 1501 size_t size; 1502 int zi; 1503 int pgno; 1504 1505 /* Fast realloc path for big allocations */ 1506 if (flags & FASTSLABREALLOC) { 1507 big = *rbigp; 1508 goto fastslabrealloc; 1509 } 1510 1511 /* 1512 * Handle NULL frees and special 0-byte allocations 1513 */ 1514 if (ptr == NULL) 1515 return; 1516 1517 /* 1518 * Handle oversized allocations. 1519 */ 1520 if ((bigp = bigalloc_check_and_lock(ptr)) != NULL) { 1521 while ((big = *bigp) != NULL) { 1522 if (big->base == ptr) { 1523 *bigp = big->next; 1524 atomic_add_long(&excess_alloc, big->active - 1525 big->bytes); 1526 bigalloc_unlock(ptr); 1527 1528 /* 1529 * Try to stash the block we are freeing, 1530 * potentially receiving another block in 1531 * return which must be freed. 1532 */ 1533 fastslabrealloc: 1534 if (big->bytes <= BIGCACHE_LIMIT) { 1535 big = bigcache_find_free(big); 1536 if (big == NULL) 1537 return; 1538 } 1539 ptr = big->base; /* reload */ 1540 size = big->bytes; 1541 _slabfree(big, 0, NULL); 1542 _vmem_free(ptr, size); 1543 return; 1544 } 1545 bigp = &big->next; 1546 } 1547 bigalloc_unlock(ptr); 1548 handle_excess_big(); 1549 } 1550 1551 /* 1552 * Zone case. Figure out the zone based on the fact that it is 1553 * ZoneSize aligned. 1554 */ 1555 z = (slzone_t)((uintptr_t)ptr & ~(uintptr_t)ZoneMask); 1556 MASSERT(z->z_Magic == ZALLOC_SLAB_MAGIC); 1557 1558 size = z->z_ChunkSize; 1559 zi = z->z_ZoneIndex; 1560 1561 if (g_malloc_flags & SAFLAG_ZERO) 1562 bzero(ptr, size); 1563 1564 if (mtmagazine_free(zi, ptr) == 0) 1565 return; 1566 1567 pgno = ((char *)ptr - (char *)z) >> PAGE_SHIFT; 1568 chunk = ptr; 1569 slgd = &SLGlobalData; 1570 slgd_lock(slgd); 1571 1572 /* 1573 * Add this free non-zero'd chunk to a linked list for reuse, adjust 1574 * z_FirstFreePg. 1575 */ 1576 chunk->c_Next = z->z_PageAry[pgno]; 1577 z->z_PageAry[pgno] = chunk; 1578 if (z->z_FirstFreePg > pgno) 1579 z->z_FirstFreePg = pgno; 1580 1581 /* 1582 * Bump the number of free chunks. If it becomes non-zero the zone 1583 * must be added back onto the appropriate list. 1584 */ 1585 if (z->z_NFree++ == 0) { 1586 z->z_Next = slgd->ZoneAry[z->z_ZoneIndex]; 1587 slgd->ZoneAry[z->z_ZoneIndex] = z; 1588 } 1589 1590 /* 1591 * If the zone becomes totally free then release it. 1592 */ 1593 if (z->z_NFree == z->z_NMax) { 1594 slzone_t *pz; 1595 1596 pz = &slgd->ZoneAry[z->z_ZoneIndex]; 1597 while (z != *pz) 1598 pz = &(*pz)->z_Next; 1599 *pz = z->z_Next; 1600 z->z_Magic = -1; 1601 z->z_Next = NULL; 1602 zone_free(z); 1603 /* slgd lock released */ 1604 return; 1605 } 1606 slgd_unlock(slgd); 1607 } 1608 1609 /* 1610 * Allocate and return a magazine. NULL is returned and *burst is adjusted 1611 * if the magazine is empty. 1612 */ 1613 static __inline void * 1614 magazine_alloc(struct magazine *mp, int *burst) 1615 { 1616 void *obj; 1617 1618 if (mp == NULL) 1619 return(NULL); 1620 if (MAGAZINE_NOTEMPTY(mp)) { 1621 obj = mp->objects[--mp->rounds]; 1622 return(obj); 1623 } 1624 1625 /* 1626 * Return burst factor to caller along with NULL 1627 */ 1628 if ((mp->flags & M_BURST) && (burst != NULL)) { 1629 *burst = mp->burst_factor; 1630 } 1631 /* Reduce burst factor by NSCALE; if it hits 1, disable BURST */ 1632 if ((mp->flags & M_BURST) && (mp->flags & M_BURST_EARLY) && 1633 (burst != NULL)) { 1634 mp->burst_factor -= M_BURST_NSCALE; 1635 if (mp->burst_factor <= 1) { 1636 mp->burst_factor = 1; 1637 mp->flags &= ~(M_BURST); 1638 mp->flags &= ~(M_BURST_EARLY); 1639 } 1640 } 1641 return (NULL); 1642 } 1643 1644 static __inline int 1645 magazine_free(struct magazine *mp, void *p) 1646 { 1647 if (mp != NULL && MAGAZINE_NOTFULL(mp)) { 1648 mp->objects[mp->rounds++] = p; 1649 return 0; 1650 } 1651 1652 return -1; 1653 } 1654 1655 static void * 1656 mtmagazine_alloc(int zi) 1657 { 1658 thr_mags *tp; 1659 struct magazine *mp, *emptymag; 1660 magazine_depot *d; 1661 void *obj; 1662 1663 /* 1664 * Do not try to access per-thread magazines while the mtmagazine 1665 * is being initialized or destroyed. 1666 */ 1667 tp = &thread_mags; 1668 if (tp->init < 0) 1669 return(NULL); 1670 1671 /* 1672 * Primary per-thread allocation loop 1673 */ 1674 nmalloc_sigblockall(); 1675 for (;;) { 1676 /* 1677 * If the loaded magazine has rounds, allocate and return 1678 */ 1679 mp = tp->mags[zi].loaded; 1680 obj = magazine_alloc(mp, NULL); 1681 if (obj) 1682 break; 1683 1684 /* 1685 * If the prev magazine is full, swap with the loaded 1686 * magazine and retry. 1687 */ 1688 mp = tp->mags[zi].prev; 1689 if (mp && MAGAZINE_FULL(mp)) { 1690 MASSERT(mp->rounds != 0); 1691 swap_mags(&tp->mags[zi]); /* prev now empty */ 1692 continue; 1693 } 1694 1695 /* 1696 * Try to get a full magazine from the depot. Cycle 1697 * through depot(full)->loaded->prev->depot(empty). 1698 * Retry if a full magazine was available from the depot. 1699 * 1700 * Return NULL (caller will fall through) if no magazines 1701 * can be found anywhere. 1702 */ 1703 d = &depots[zi]; 1704 depot_lock(d); 1705 emptymag = tp->mags[zi].prev; 1706 if (emptymag) 1707 SLIST_INSERT_HEAD(&d->empty, emptymag, nextmagazine); 1708 tp->mags[zi].prev = tp->mags[zi].loaded; 1709 mp = SLIST_FIRST(&d->full); /* loaded magazine */ 1710 tp->mags[zi].loaded = mp; 1711 if (mp) { 1712 SLIST_REMOVE_HEAD(&d->full, nextmagazine); 1713 depot_unlock(d); 1714 MASSERT(MAGAZINE_NOTEMPTY(mp)); 1715 continue; 1716 } 1717 depot_unlock(d); 1718 break; 1719 } 1720 nmalloc_sigunblockall(); 1721 1722 return (obj); 1723 } 1724 1725 static int 1726 mtmagazine_free(int zi, void *ptr) 1727 { 1728 thr_mags *tp; 1729 struct magazine *mp, *loadedmag; 1730 magazine_depot *d; 1731 int rc = -1; 1732 1733 /* 1734 * Do not try to access per-thread magazines while the mtmagazine 1735 * is being initialized or destroyed. 1736 */ 1737 tp = &thread_mags; 1738 if (tp->init < 0) 1739 return(-1); 1740 1741 /* 1742 * Primary per-thread freeing loop 1743 */ 1744 nmalloc_sigblockall(); 1745 for (;;) { 1746 /* 1747 * Make sure a new magazine is available in case we have 1748 * to use it. Staging the newmag allows us to avoid 1749 * some locking/reentrancy complexity. 1750 * 1751 * Temporarily disable the per-thread caches for this 1752 * allocation to avoid reentrancy and/or to avoid a 1753 * stack overflow if the [zi] happens to be the same that 1754 * would be used to allocate the new magazine. 1755 */ 1756 if (tp->newmag == NULL) { 1757 tp->init = -1; 1758 tp->newmag = _slaballoc(sizeof(struct magazine), 1759 SAFLAG_ZERO); 1760 tp->init = 1; 1761 if (tp->newmag == NULL) { 1762 rc = -1; 1763 break; 1764 } 1765 } 1766 1767 /* 1768 * If the loaded magazine has space, free directly to it 1769 */ 1770 rc = magazine_free(tp->mags[zi].loaded, ptr); 1771 if (rc == 0) 1772 break; 1773 1774 /* 1775 * If the prev magazine is empty, swap with the loaded 1776 * magazine and retry. 1777 */ 1778 mp = tp->mags[zi].prev; 1779 if (mp && MAGAZINE_EMPTY(mp)) { 1780 MASSERT(mp->rounds == 0); 1781 swap_mags(&tp->mags[zi]); /* prev now full */ 1782 continue; 1783 } 1784 1785 /* 1786 * Try to get an empty magazine from the depot. Cycle 1787 * through depot(empty)->loaded->prev->depot(full). 1788 * Retry if an empty magazine was available from the depot. 1789 */ 1790 d = &depots[zi]; 1791 depot_lock(d); 1792 1793 if ((loadedmag = tp->mags[zi].prev) != NULL) 1794 SLIST_INSERT_HEAD(&d->full, loadedmag, nextmagazine); 1795 tp->mags[zi].prev = tp->mags[zi].loaded; 1796 mp = SLIST_FIRST(&d->empty); 1797 if (mp) { 1798 tp->mags[zi].loaded = mp; 1799 SLIST_REMOVE_HEAD(&d->empty, nextmagazine); 1800 depot_unlock(d); 1801 MASSERT(MAGAZINE_NOTFULL(mp)); 1802 } else { 1803 mp = tp->newmag; 1804 tp->newmag = NULL; 1805 mp->capacity = M_MAX_ROUNDS; 1806 mp->rounds = 0; 1807 mp->flags = 0; 1808 tp->mags[zi].loaded = mp; 1809 depot_unlock(d); 1810 } 1811 } 1812 nmalloc_sigunblockall(); 1813 1814 return rc; 1815 } 1816 1817 static void 1818 mtmagazine_init(void) 1819 { 1820 int error; 1821 1822 error = _pthread_key_create(&thread_mags_key, mtmagazine_destructor); 1823 if (error) 1824 abort(); 1825 } 1826 1827 /* 1828 * This function is only used by the thread exit destructor 1829 */ 1830 static void 1831 mtmagazine_drain(struct magazine *mp) 1832 { 1833 void *obj; 1834 1835 while (MAGAZINE_NOTEMPTY(mp)) { 1836 obj = magazine_alloc(mp, NULL); 1837 _slabfree(obj, 0, NULL); 1838 } 1839 } 1840 1841 /* 1842 * mtmagazine_destructor() 1843 * 1844 * When a thread exits, we reclaim all its resources; all its magazines are 1845 * drained and the structures are freed. 1846 * 1847 * WARNING! The destructor can be called multiple times if the larger user 1848 * program has its own destructors which run after ours which 1849 * allocate or free memory. 1850 */ 1851 static void 1852 mtmagazine_destructor(void *thrp) 1853 { 1854 thr_mags *tp = thrp; 1855 struct magazine *mp; 1856 int i; 1857 1858 /* 1859 * Prevent further use of mtmagazines while we are destructing 1860 * them, as well as for any destructors which are run after us 1861 * prior to the thread actually being destroyed. 1862 */ 1863 tp->init = -1; 1864 1865 for (i = 0; i < NZONES; i++) { 1866 mp = tp->mags[i].loaded; 1867 tp->mags[i].loaded = NULL; 1868 if (mp) { 1869 if (MAGAZINE_NOTEMPTY(mp)) 1870 mtmagazine_drain(mp); 1871 _slabfree(mp, 0, NULL); 1872 } 1873 1874 mp = tp->mags[i].prev; 1875 tp->mags[i].prev = NULL; 1876 if (mp) { 1877 if (MAGAZINE_NOTEMPTY(mp)) 1878 mtmagazine_drain(mp); 1879 _slabfree(mp, 0, NULL); 1880 } 1881 } 1882 1883 if (tp->newmag) { 1884 mp = tp->newmag; 1885 tp->newmag = NULL; 1886 _slabfree(mp, 0, NULL); 1887 } 1888 } 1889 1890 /* 1891 * zone_alloc() 1892 * 1893 * Attempt to allocate a zone from the zone magazine; the zone magazine has 1894 * M_BURST_EARLY enabled, so honor the burst request from the magazine. 1895 */ 1896 static slzone_t 1897 zone_alloc(int flags) 1898 { 1899 slglobaldata_t slgd = &SLGlobalData; 1900 int burst = 1; 1901 int i, j; 1902 slzone_t z; 1903 1904 zone_magazine_lock(); 1905 slgd_unlock(slgd); 1906 1907 z = magazine_alloc(&zone_magazine, &burst); 1908 if (z == NULL && burst == 1) { 1909 zone_magazine_unlock(); 1910 z = _vmem_alloc(ZoneSize * burst, ZoneSize, flags); 1911 } else if (z == NULL) { 1912 z = _vmem_alloc(ZoneSize * burst, ZoneSize, flags); 1913 if (z) { 1914 for (i = 1; i < burst; i++) { 1915 j = magazine_free(&zone_magazine, 1916 (char *) z + (ZoneSize * i)); 1917 MASSERT_WTHUNLK(j == 0, zone_magazine_unlock()); 1918 } 1919 } 1920 zone_magazine_unlock(); 1921 } else { 1922 z->z_Flags |= SLZF_UNOTZEROD; 1923 zone_magazine_unlock(); 1924 } 1925 slgd_lock(slgd); 1926 return z; 1927 } 1928 1929 /* 1930 * zone_free() 1931 * 1932 * Release a zone and unlock the slgd lock. 1933 */ 1934 static void 1935 zone_free(void *z) 1936 { 1937 slglobaldata_t slgd = &SLGlobalData; 1938 void *excess[M_ZONE_ROUNDS - M_LOW_ROUNDS] = {}; 1939 int i, j; 1940 1941 zone_magazine_lock(); 1942 slgd_unlock(slgd); 1943 1944 bzero(z, sizeof(struct slzone)); 1945 1946 if (opt_madvise) 1947 madvise(z, ZoneSize, MADV_FREE); 1948 1949 i = magazine_free(&zone_magazine, z); 1950 1951 /* 1952 * If we failed to free, collect excess magazines; release the zone 1953 * magazine lock, and then free to the system via _vmem_free. Re-enable 1954 * BURST mode for the magazine. 1955 */ 1956 if (i == -1) { 1957 j = zone_magazine.rounds - zone_magazine.low_factor; 1958 for (i = 0; i < j; i++) { 1959 excess[i] = magazine_alloc(&zone_magazine, NULL); 1960 MASSERT_WTHUNLK(excess[i] != NULL, 1961 zone_magazine_unlock()); 1962 } 1963 1964 zone_magazine_unlock(); 1965 1966 for (i = 0; i < j; i++) 1967 _vmem_free(excess[i], ZoneSize); 1968 1969 _vmem_free(z, ZoneSize); 1970 } else { 1971 zone_magazine_unlock(); 1972 } 1973 } 1974 1975 /* 1976 * _vmem_alloc() 1977 * 1978 * Directly map memory in PAGE_SIZE'd chunks with the specified 1979 * alignment. 1980 * 1981 * Alignment must be a multiple of PAGE_SIZE. 1982 * 1983 * Size must be >= alignment. 1984 */ 1985 static void * 1986 _vmem_alloc(size_t size, size_t align, int flags) 1987 { 1988 static char *addr_hint; 1989 static int reset_hint = 16; 1990 char *addr; 1991 char *save; 1992 1993 if (--reset_hint <= 0) { 1994 addr_hint = NULL; 1995 reset_hint = 16; 1996 } 1997 1998 /* 1999 * Map anonymous private memory. 2000 */ 2001 save = mmap(addr_hint, size, PROT_READ|PROT_WRITE, 2002 MAP_PRIVATE|MAP_ANON, -1, 0); 2003 if (save == MAP_FAILED) 2004 goto worst_case; 2005 if (((uintptr_t)save & (align - 1)) == 0) 2006 return((void *)save); 2007 2008 addr_hint = (char *)(((size_t)save + (align - 1)) & ~(align - 1)); 2009 munmap(save, size); 2010 2011 save = mmap(addr_hint, size, PROT_READ|PROT_WRITE, 2012 MAP_PRIVATE|MAP_ANON, -1, 0); 2013 if (save == MAP_FAILED) 2014 goto worst_case; 2015 if (((size_t)save & (align - 1)) == 0) 2016 return((void *)save); 2017 munmap(save, size); 2018 2019 worst_case: 2020 save = mmap(NULL, size + align, PROT_READ|PROT_WRITE, 2021 MAP_PRIVATE|MAP_ANON, -1, 0); 2022 if (save == MAP_FAILED) 2023 return NULL; 2024 2025 addr = (char *)(((size_t)save + (align - 1)) & ~(align - 1)); 2026 if (save != addr) 2027 munmap(save, addr - save); 2028 if (addr + size != save + size + align) 2029 munmap(addr + size, save + align - addr); 2030 2031 addr_hint = addr + size; 2032 2033 return ((void *)addr); 2034 } 2035 2036 /* 2037 * _vmem_free() 2038 * 2039 * Free a chunk of memory allocated with _vmem_alloc() 2040 */ 2041 static void 2042 _vmem_free(void *ptr, size_t size) 2043 { 2044 munmap(ptr, size); 2045 } 2046 2047 /* 2048 * Panic on fatal conditions 2049 */ 2050 static void 2051 _mpanic(const char *ctl, ...) 2052 { 2053 va_list va; 2054 2055 if (malloc_panic == 0) { 2056 malloc_panic = 1; 2057 va_start(va, ctl); 2058 vfprintf(stderr, ctl, va); 2059 fprintf(stderr, "\n"); 2060 fflush(stderr); 2061 va_end(va); 2062 } 2063 abort(); 2064 } 2065 2066 __weak_reference(__aligned_alloc, aligned_alloc); 2067 __weak_reference(__malloc, malloc); 2068 __weak_reference(__calloc, calloc); 2069 __weak_reference(__posix_memalign, posix_memalign); 2070 __weak_reference(__realloc, realloc); 2071 __weak_reference(__free, free); 2072