1 /* 2 * NMALLOC.C - New Malloc (ported from kernel slab allocator) 3 * 4 * Copyright (c) 2003,2004,2009,2010 The DragonFly Project. All rights reserved. 5 * 6 * This code is derived from software contributed to The DragonFly Project 7 * by Matthew Dillon <dillon@backplane.com> and by 8 * Venkatesh Srinivas <me@endeavour.zapto.org>. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in 18 * the documentation and/or other materials provided with the 19 * distribution. 20 * 3. Neither the name of The DragonFly Project nor the names of its 21 * contributors may be used to endorse or promote products derived 22 * from this software without specific, prior written permission. 23 * 24 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 25 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 26 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 27 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 28 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 29 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, 30 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 31 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 32 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 33 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 34 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 35 * SUCH DAMAGE. 36 * 37 * $Id: nmalloc.c,v 1.37 2010/07/23 08:20:35 vsrinivas Exp $ 38 */ 39 /* 40 * This module implements a slab allocator drop-in replacement for the 41 * libc malloc(). 42 * 43 * A slab allocator reserves a ZONE for each chunk size, then lays the 44 * chunks out in an array within the zone. Allocation and deallocation 45 * is nearly instantaneous, and overhead losses are limited to a fixed 46 * worst-case amount. 47 * 48 * The slab allocator does not have to pre-initialize the list of 49 * free chunks for each zone, and the underlying VM will not be 50 * touched at all beyond the zone header until an actual allocation 51 * needs it. 52 * 53 * Slab management and locking is done on a per-zone basis. 54 * 55 * Alloc Size Chunking Number of zones 56 * 0-127 8 16 57 * 128-255 16 8 58 * 256-511 32 8 59 * 512-1023 64 8 60 * 1024-2047 128 8 61 * 2048-4095 256 8 62 * 4096-8191 512 8 63 * 8192-16383 1024 8 64 * 16384-32767 2048 8 65 * 66 * Allocations >= ZoneLimit (16K) go directly to mmap and a hash table 67 * is used to locate for free. One and Two-page allocations use the 68 * zone mechanic to avoid excessive mmap()/munmap() calls. 69 * 70 * API FEATURES AND SIDE EFFECTS 71 * 72 * + power-of-2 sized allocations up to a page will be power-of-2 aligned. 73 * Above that power-of-2 sized allocations are page-aligned. Non 74 * power-of-2 sized allocations are aligned the same as the chunk 75 * size for their zone. 76 * + malloc(0) returns a special non-NULL value 77 * + ability to allocate arbitrarily large chunks of memory 78 * + realloc will reuse the passed pointer if possible, within the 79 * limitations of the zone chunking. 80 * 81 * Multithreaded enhancements for small allocations introduced August 2010. 82 * These are in the spirit of 'libumem'. See: 83 * Bonwick, J.; Adams, J. (2001). "Magazines and Vmem: Extending the 84 * slab allocator to many CPUs and arbitrary resources". In Proc. 2001 85 * USENIX Technical Conference. USENIX Association. 86 * 87 * Oversized allocations employ the BIGCACHE mechanic whereby large 88 * allocations may be handed significantly larger buffers, allowing them 89 * to avoid mmap/munmap operations even through significant realloc()s. 90 * The excess space is only trimmed if too many large allocations have been 91 * given this treatment. 92 * 93 * TUNING 94 * 95 * The value of the environment variable MALLOC_OPTIONS is a character string 96 * containing various flags to tune nmalloc. 97 * 98 * 'U' / ['u'] Generate / do not generate utrace entries for ktrace(1) 99 * This will generate utrace events for all malloc, 100 * realloc, and free calls. There are tools (mtrplay) to 101 * replay and allocation pattern or to graph heap structure 102 * (mtrgraph) which can interpret these logs. 103 * 'Z' / ['z'] Zero out / do not zero all allocations. 104 * Each new byte of memory allocated by malloc, realloc, or 105 * reallocf will be initialized to 0. This is intended for 106 * debugging and will affect performance negatively. 107 * 'H' / ['h'] Pass a hint to the kernel about pages unused by the 108 * allocation functions. 109 */ 110 111 /* cc -shared -fPIC -g -O -I/usr/src/lib/libc/include -o nmalloc.so nmalloc.c */ 112 113 #include "namespace.h" 114 #include <sys/param.h> 115 #include <sys/types.h> 116 #include <sys/mman.h> 117 #include <sys/queue.h> 118 #include <sys/uio.h> 119 #include <sys/ktrace.h> 120 #include <stdio.h> 121 #include <stdint.h> 122 #include <stdlib.h> 123 #include <stdarg.h> 124 #include <stddef.h> 125 #include <unistd.h> 126 #include <string.h> 127 #include <fcntl.h> 128 #include <errno.h> 129 #include <pthread.h> 130 #include <machine/atomic.h> 131 #include "un-namespace.h" 132 133 #include "libc_private.h" 134 #include "spinlock.h" 135 136 void __free(void *); 137 void *__malloc(size_t); 138 void *__calloc(size_t, size_t); 139 void *__realloc(void *, size_t); 140 void *__aligned_alloc(size_t, size_t); 141 int __posix_memalign(void **, size_t, size_t); 142 143 /* 144 * Linked list of large allocations 145 */ 146 typedef struct bigalloc { 147 struct bigalloc *next; /* hash link */ 148 void *base; /* base pointer */ 149 u_long active; /* bytes active */ 150 u_long bytes; /* bytes allocated */ 151 } *bigalloc_t; 152 153 /* 154 * Note that any allocations which are exact multiples of PAGE_SIZE, or 155 * which are >= ZALLOC_ZONE_LIMIT, will fall through to the kmem subsystem. 156 */ 157 #define ZALLOC_ZONE_LIMIT (16 * 1024) /* max slab-managed alloc */ 158 #define ZALLOC_MIN_ZONE_SIZE (32 * 1024) /* minimum zone size */ 159 #define ZALLOC_MAX_ZONE_SIZE (128 * 1024) /* maximum zone size */ 160 #define ZALLOC_ZONE_SIZE (64 * 1024) 161 #define ZALLOC_SLAB_MAGIC 0x736c6162 /* magic sanity */ 162 #define ZALLOC_SLAB_SLIDE 20 /* L1-cache skip */ 163 164 #if ZALLOC_ZONE_LIMIT == 16384 165 #define NZONES 72 166 #elif ZALLOC_ZONE_LIMIT == 32768 167 #define NZONES 80 168 #else 169 #error "I couldn't figure out NZONES" 170 #endif 171 172 /* 173 * Chunk structure for free elements 174 */ 175 typedef struct slchunk { 176 struct slchunk *c_Next; 177 } *slchunk_t; 178 179 /* 180 * The IN-BAND zone header is placed at the beginning of each zone. 181 */ 182 struct slglobaldata; 183 184 typedef struct slzone { 185 int32_t z_Magic; /* magic number for sanity check */ 186 int z_NFree; /* total free chunks / ualloc space */ 187 struct slzone *z_Next; /* ZoneAry[] link if z_NFree non-zero */ 188 int z_NMax; /* maximum free chunks */ 189 char *z_BasePtr; /* pointer to start of chunk array */ 190 int z_UIndex; /* current initial allocation index */ 191 int z_UEndIndex; /* last (first) allocation index */ 192 int z_ChunkSize; /* chunk size for validation */ 193 int z_FirstFreePg; /* chunk list on a page-by-page basis */ 194 int z_ZoneIndex; 195 int z_Flags; 196 struct slchunk *z_PageAry[ZALLOC_ZONE_SIZE / PAGE_SIZE]; 197 } *slzone_t; 198 199 typedef struct slglobaldata { 200 spinlock_t Spinlock; 201 slzone_t ZoneAry[NZONES];/* linked list of zones NFree > 0 */ 202 int JunkIndex; 203 } *slglobaldata_t; 204 205 #define SLZF_UNOTZEROD 0x0001 206 207 #define FASTSLABREALLOC 0x02 208 209 /* 210 * Misc constants. Note that allocations that are exact multiples of 211 * PAGE_SIZE, or exceed the zone limit, fall through to the kmem module. 212 * IN_SAME_PAGE_MASK is used to sanity-check the per-page free lists. 213 */ 214 #define MIN_CHUNK_SIZE 8 /* in bytes */ 215 #define MIN_CHUNK_MASK (MIN_CHUNK_SIZE - 1) 216 #define IN_SAME_PAGE_MASK (~(intptr_t)PAGE_MASK | MIN_CHUNK_MASK) 217 218 /* 219 * WARNING: A limited number of spinlocks are available, BIGXSIZE should 220 * not be larger then 64. 221 */ 222 #define BIGHSHIFT 10 /* bigalloc hash table */ 223 #define BIGHSIZE (1 << BIGHSHIFT) 224 #define BIGHMASK (BIGHSIZE - 1) 225 #define BIGXSIZE (BIGHSIZE / 16) /* bigalloc lock table */ 226 #define BIGXMASK (BIGXSIZE - 1) 227 228 /* 229 * BIGCACHE caches oversized allocations. Note that a linear search is 230 * performed, so do not make the cache too large. 231 * 232 * BIGCACHE will garbage-collect excess space when the excess exceeds the 233 * specified value. A relatively large number should be used here because 234 * garbage collection is expensive. 235 */ 236 #define BIGCACHE 16 237 #define BIGCACHE_MASK (BIGCACHE - 1) 238 #define BIGCACHE_LIMIT (1024 * 1024) /* size limit */ 239 #define BIGCACHE_EXCESS (16 * 1024 * 1024) /* garbage collect */ 240 241 #define SAFLAG_ZERO 0x0001 242 #define SAFLAG_PASSIVE 0x0002 243 244 /* 245 * Thread control 246 */ 247 248 #define arysize(ary) (sizeof(ary)/sizeof((ary)[0])) 249 250 #define MASSERT(exp) do { if (__predict_false(!(exp))) \ 251 _mpanic("assertion: %s in %s", \ 252 #exp, __func__); \ 253 } while (0) 254 255 /* 256 * Magazines 257 */ 258 259 #define M_MAX_ROUNDS 64 260 #define M_ZONE_ROUNDS 64 261 #define M_LOW_ROUNDS 32 262 #define M_INIT_ROUNDS 8 263 #define M_BURST_FACTOR 8 264 #define M_BURST_NSCALE 2 265 266 #define M_BURST 0x0001 267 #define M_BURST_EARLY 0x0002 268 269 struct magazine { 270 SLIST_ENTRY(magazine) nextmagazine; 271 272 int flags; 273 int capacity; /* Max rounds in this magazine */ 274 int rounds; /* Current number of free rounds */ 275 int burst_factor; /* Number of blocks to prefill with */ 276 int low_factor; /* Free till low_factor from full mag */ 277 void *objects[M_MAX_ROUNDS]; 278 }; 279 280 SLIST_HEAD(magazinelist, magazine); 281 282 static spinlock_t zone_mag_lock; 283 static spinlock_t depot_spinlock; 284 static struct magazine zone_magazine = { 285 .flags = M_BURST | M_BURST_EARLY, 286 .capacity = M_ZONE_ROUNDS, 287 .rounds = 0, 288 .burst_factor = M_BURST_FACTOR, 289 .low_factor = M_LOW_ROUNDS 290 }; 291 292 #define MAGAZINE_FULL(mp) (mp->rounds == mp->capacity) 293 #define MAGAZINE_NOTFULL(mp) (mp->rounds < mp->capacity) 294 #define MAGAZINE_EMPTY(mp) (mp->rounds == 0) 295 #define MAGAZINE_NOTEMPTY(mp) (mp->rounds != 0) 296 297 /* 298 * Each thread will have a pair of magazines per size-class (NZONES) 299 * The loaded magazine will support immediate allocations, the previous 300 * magazine will either be full or empty and can be swapped at need 301 */ 302 typedef struct magazine_pair { 303 struct magazine *loaded; 304 struct magazine *prev; 305 } magazine_pair; 306 307 /* A depot is a collection of magazines for a single zone. */ 308 typedef struct magazine_depot { 309 struct magazinelist full; 310 struct magazinelist empty; 311 spinlock_t lock; 312 } magazine_depot; 313 314 typedef struct thr_mags { 315 magazine_pair mags[NZONES]; 316 struct magazine *newmag; 317 int init; 318 } thr_mags; 319 320 /* 321 * With this attribute set, do not require a function call for accessing 322 * this variable when the code is compiled -fPIC. 323 * 324 * Must be empty for libc_rtld (similar to __thread). 325 */ 326 #ifdef __LIBC_RTLD 327 #define TLS_ATTRIBUTE 328 #else 329 #define TLS_ATTRIBUTE __attribute__ ((tls_model ("initial-exec"))) 330 #endif 331 332 static __thread thr_mags thread_mags TLS_ATTRIBUTE; 333 static pthread_key_t thread_mags_key; 334 static pthread_once_t thread_mags_once = PTHREAD_ONCE_INIT; 335 static magazine_depot depots[NZONES]; 336 337 /* 338 * Fixed globals (not per-cpu) 339 */ 340 static const int ZoneSize = ZALLOC_ZONE_SIZE; 341 static const int ZoneLimit = ZALLOC_ZONE_LIMIT; 342 static const int ZonePageCount = ZALLOC_ZONE_SIZE / PAGE_SIZE; 343 static const int ZoneMask = ZALLOC_ZONE_SIZE - 1; 344 345 static int opt_madvise = 0; 346 static int opt_utrace = 0; 347 static int g_malloc_flags = 0; 348 static struct slglobaldata SLGlobalData; 349 static bigalloc_t bigalloc_array[BIGHSIZE]; 350 static spinlock_t bigspin_array[BIGXSIZE]; 351 static volatile void *bigcache_array[BIGCACHE]; /* atomic swap */ 352 static volatile size_t bigcache_size_array[BIGCACHE]; /* SMP races ok */ 353 static volatile int bigcache_index; /* SMP races ok */ 354 static int malloc_panic; 355 static size_t excess_alloc; /* excess big allocs */ 356 357 static void *_slaballoc(size_t size, int flags); 358 static void *_slabrealloc(void *ptr, size_t size); 359 static void _slabfree(void *ptr, int, bigalloc_t *); 360 static int _slabmemalign(void **memptr, size_t alignment, size_t size); 361 static void *_vmem_alloc(size_t bytes, size_t align, int flags); 362 static void _vmem_free(void *ptr, size_t bytes); 363 static void *magazine_alloc(struct magazine *, int *); 364 static int magazine_free(struct magazine *, void *); 365 static void *mtmagazine_alloc(int zi); 366 static int mtmagazine_free(int zi, void *); 367 static void mtmagazine_init(void); 368 static void mtmagazine_destructor(void *); 369 static slzone_t zone_alloc(int flags); 370 static void zone_free(void *z); 371 static void _mpanic(const char *ctl, ...) __printflike(1, 2); 372 static void malloc_init(void) __constructor(101); 373 374 struct nmalloc_utrace { 375 void *p; 376 size_t s; 377 void *r; 378 }; 379 380 #define UTRACE(a, b, c) \ 381 if (opt_utrace) { \ 382 struct nmalloc_utrace ut = { \ 383 .p = (a), \ 384 .s = (b), \ 385 .r = (c) \ 386 }; \ 387 utrace(&ut, sizeof(ut)); \ 388 } 389 390 static void 391 malloc_init(void) 392 { 393 const char *p = NULL; 394 395 if (issetugid() == 0) 396 p = getenv("MALLOC_OPTIONS"); 397 398 for (; p != NULL && *p != '\0'; p++) { 399 switch(*p) { 400 case 'u': opt_utrace = 0; break; 401 case 'U': opt_utrace = 1; break; 402 case 'h': opt_madvise = 0; break; 403 case 'H': opt_madvise = 1; break; 404 case 'z': g_malloc_flags = 0; break; 405 case 'Z': g_malloc_flags = SAFLAG_ZERO; break; 406 default: 407 break; 408 } 409 } 410 411 UTRACE((void *) -1, 0, NULL); 412 } 413 414 /* 415 * We have to install a handler for nmalloc thread teardowns when 416 * the thread is created. We cannot delay this because destructors in 417 * sophisticated userland programs can call malloc() for the first time 418 * during their thread exit. 419 * 420 * This routine is called directly from pthreads. 421 */ 422 void 423 _nmalloc_thr_init(void) 424 { 425 static int init_once; 426 thr_mags *tp; 427 428 /* 429 * Disallow mtmagazine operations until the mtmagazine is 430 * initialized. 431 */ 432 tp = &thread_mags; 433 tp->init = -1; 434 435 if (init_once == 0) { 436 init_once = 1; 437 _pthread_once(&thread_mags_once, mtmagazine_init); 438 } 439 _pthread_setspecific(thread_mags_key, tp); 440 tp->init = 1; 441 } 442 443 void 444 _nmalloc_thr_prepfork(void) 445 { 446 if (__isthreaded) { 447 _SPINLOCK(&zone_mag_lock); 448 _SPINLOCK(&depot_spinlock); 449 } 450 } 451 452 void 453 _nmalloc_thr_parentfork(void) 454 { 455 if (__isthreaded) { 456 _SPINUNLOCK(&depot_spinlock); 457 _SPINUNLOCK(&zone_mag_lock); 458 } 459 } 460 461 void 462 _nmalloc_thr_childfork(void) 463 { 464 if (__isthreaded) { 465 _SPINUNLOCK(&depot_spinlock); 466 _SPINUNLOCK(&zone_mag_lock); 467 } 468 } 469 470 /* 471 * Thread locks. 472 */ 473 static __inline void 474 slgd_lock(slglobaldata_t slgd) 475 { 476 if (__isthreaded) 477 _SPINLOCK(&slgd->Spinlock); 478 } 479 480 static __inline void 481 slgd_unlock(slglobaldata_t slgd) 482 { 483 if (__isthreaded) 484 _SPINUNLOCK(&slgd->Spinlock); 485 } 486 487 static __inline void 488 depot_lock(magazine_depot *dp __unused) 489 { 490 if (__isthreaded) 491 _SPINLOCK(&depot_spinlock); 492 #if 0 493 if (__isthreaded) 494 _SPINLOCK(&dp->lock); 495 #endif 496 } 497 498 static __inline void 499 depot_unlock(magazine_depot *dp __unused) 500 { 501 if (__isthreaded) 502 _SPINUNLOCK(&depot_spinlock); 503 #if 0 504 if (__isthreaded) 505 _SPINUNLOCK(&dp->lock); 506 #endif 507 } 508 509 static __inline void 510 zone_magazine_lock(void) 511 { 512 if (__isthreaded) 513 _SPINLOCK(&zone_mag_lock); 514 } 515 516 static __inline void 517 zone_magazine_unlock(void) 518 { 519 if (__isthreaded) 520 _SPINUNLOCK(&zone_mag_lock); 521 } 522 523 static __inline void 524 swap_mags(magazine_pair *mp) 525 { 526 struct magazine *tmp; 527 tmp = mp->loaded; 528 mp->loaded = mp->prev; 529 mp->prev = tmp; 530 } 531 532 /* 533 * bigalloc hashing and locking support. 534 * 535 * Return an unmasked hash code for the passed pointer. 536 */ 537 static __inline int 538 _bigalloc_hash(void *ptr) 539 { 540 int hv; 541 542 hv = ((int)(intptr_t)ptr >> PAGE_SHIFT) ^ 543 ((int)(intptr_t)ptr >> (PAGE_SHIFT + BIGHSHIFT)); 544 545 return(hv); 546 } 547 548 /* 549 * Lock the hash chain and return a pointer to its base for the specified 550 * address. 551 */ 552 static __inline bigalloc_t * 553 bigalloc_lock(void *ptr) 554 { 555 int hv = _bigalloc_hash(ptr); 556 bigalloc_t *bigp; 557 558 bigp = &bigalloc_array[hv & BIGHMASK]; 559 if (__isthreaded) 560 _SPINLOCK(&bigspin_array[hv & BIGXMASK]); 561 return(bigp); 562 } 563 564 /* 565 * Lock the hash chain and return a pointer to its base for the specified 566 * address. 567 * 568 * BUT, if the hash chain is empty, just return NULL and do not bother 569 * to lock anything. 570 */ 571 static __inline bigalloc_t * 572 bigalloc_check_and_lock(void *ptr) 573 { 574 int hv = _bigalloc_hash(ptr); 575 bigalloc_t *bigp; 576 577 bigp = &bigalloc_array[hv & BIGHMASK]; 578 if (*bigp == NULL) 579 return(NULL); 580 if (__isthreaded) { 581 _SPINLOCK(&bigspin_array[hv & BIGXMASK]); 582 } 583 return(bigp); 584 } 585 586 static __inline void 587 bigalloc_unlock(void *ptr) 588 { 589 int hv; 590 591 if (__isthreaded) { 592 hv = _bigalloc_hash(ptr); 593 _SPINUNLOCK(&bigspin_array[hv & BIGXMASK]); 594 } 595 } 596 597 /* 598 * Find a bigcache entry that might work for the allocation. SMP races are 599 * ok here except for the swap (that is, it is ok if bigcache_size_array[i] 600 * is wrong or if a NULL or too-small big is returned). 601 * 602 * Generally speaking it is ok to find a large entry even if the bytes 603 * requested are relatively small (but still oversized), because we really 604 * don't know *what* the application is going to do with the buffer. 605 */ 606 static __inline 607 bigalloc_t 608 bigcache_find_alloc(size_t bytes) 609 { 610 bigalloc_t big = NULL; 611 size_t test; 612 int i; 613 614 for (i = 0; i < BIGCACHE; ++i) { 615 test = bigcache_size_array[i]; 616 if (bytes <= test) { 617 bigcache_size_array[i] = 0; 618 big = atomic_swap_ptr(&bigcache_array[i], NULL); 619 break; 620 } 621 } 622 return big; 623 } 624 625 /* 626 * Free a bigcache entry, possibly returning one that the caller really must 627 * free. This is used to cache recent oversized memory blocks. Only 628 * big blocks smaller than BIGCACHE_LIMIT will be cached this way, so try 629 * to collect the biggest ones we can that are under the limit. 630 */ 631 static __inline 632 bigalloc_t 633 bigcache_find_free(bigalloc_t big) 634 { 635 int i; 636 int j; 637 int b; 638 639 b = ++bigcache_index; 640 for (i = 0; i < BIGCACHE; ++i) { 641 j = (b + i) & BIGCACHE_MASK; 642 if (bigcache_size_array[j] < big->bytes) { 643 bigcache_size_array[j] = big->bytes; 644 big = atomic_swap_ptr(&bigcache_array[j], big); 645 break; 646 } 647 } 648 return big; 649 } 650 651 static __inline 652 void 653 handle_excess_big(void) 654 { 655 int i; 656 bigalloc_t big; 657 bigalloc_t *bigp; 658 659 if (excess_alloc <= BIGCACHE_EXCESS) 660 return; 661 662 for (i = 0; i < BIGHSIZE; ++i) { 663 bigp = &bigalloc_array[i]; 664 if (*bigp == NULL) 665 continue; 666 if (__isthreaded) 667 _SPINLOCK(&bigspin_array[i & BIGXMASK]); 668 for (big = *bigp; big; big = big->next) { 669 if (big->active < big->bytes) { 670 MASSERT((big->active & PAGE_MASK) == 0); 671 MASSERT((big->bytes & PAGE_MASK) == 0); 672 munmap((char *)big->base + big->active, 673 big->bytes - big->active); 674 atomic_add_long(&excess_alloc, 675 big->active - big->bytes); 676 big->bytes = big->active; 677 } 678 } 679 if (__isthreaded) 680 _SPINUNLOCK(&bigspin_array[i & BIGXMASK]); 681 } 682 } 683 684 /* 685 * Calculate the zone index for the allocation request size and set the 686 * allocation request size to that particular zone's chunk size. 687 */ 688 static __inline int 689 zoneindex(size_t *bytes, size_t *chunking) 690 { 691 size_t n = (unsigned int)*bytes; /* unsigned for shift opt */ 692 693 /* 694 * This used to be 8-byte chunks and 16 zones for n < 128. 695 * However some instructions may require 16-byte alignment 696 * (aka SIMD) and programs might not request an aligned size 697 * (aka GCC-7), so change this as follows: 698 * 699 * 0-15 bytes 8-byte alignment in two zones (0-1) 700 * 16-127 bytes 16-byte alignment in four zones (3-10) 701 * zone index 2 and 11-15 are currently unused. 702 */ 703 if (n < 16) { 704 *bytes = n = (n + 7) & ~7; 705 *chunking = 8; 706 return(n / 8 - 1); /* 8 byte chunks, 2 zones */ 707 /* zones 0,1, zone 2 is unused */ 708 } 709 if (n < 128) { 710 *bytes = n = (n + 15) & ~15; 711 *chunking = 16; 712 return(n / 16 + 2); /* 16 byte chunks, 8 zones */ 713 /* zones 3-10, zones 11-15 unused */ 714 } 715 if (n < 256) { 716 *bytes = n = (n + 15) & ~15; 717 *chunking = 16; 718 return(n / 16 + 7); 719 } 720 if (n < 8192) { 721 if (n < 512) { 722 *bytes = n = (n + 31) & ~31; 723 *chunking = 32; 724 return(n / 32 + 15); 725 } 726 if (n < 1024) { 727 *bytes = n = (n + 63) & ~63; 728 *chunking = 64; 729 return(n / 64 + 23); 730 } 731 if (n < 2048) { 732 *bytes = n = (n + 127) & ~127; 733 *chunking = 128; 734 return(n / 128 + 31); 735 } 736 if (n < 4096) { 737 *bytes = n = (n + 255) & ~255; 738 *chunking = 256; 739 return(n / 256 + 39); 740 } 741 *bytes = n = (n + 511) & ~511; 742 *chunking = 512; 743 return(n / 512 + 47); 744 } 745 #if ZALLOC_ZONE_LIMIT > 8192 746 if (n < 16384) { 747 *bytes = n = (n + 1023) & ~1023; 748 *chunking = 1024; 749 return(n / 1024 + 55); 750 } 751 #endif 752 #if ZALLOC_ZONE_LIMIT > 16384 753 if (n < 32768) { 754 *bytes = n = (n + 2047) & ~2047; 755 *chunking = 2048; 756 return(n / 2048 + 63); 757 } 758 #endif 759 _mpanic("Unexpected byte count %zu", n); 760 return(0); 761 } 762 763 /* 764 * malloc() - call internal slab allocator 765 */ 766 void * 767 __malloc(size_t size) 768 { 769 void *ptr; 770 771 ptr = _slaballoc(size, 0); 772 if (ptr == NULL) 773 errno = ENOMEM; 774 else 775 UTRACE(0, size, ptr); 776 return(ptr); 777 } 778 779 #define MUL_NO_OVERFLOW (1UL << (sizeof(size_t) * 4)) 780 781 /* 782 * calloc() - call internal slab allocator 783 */ 784 void * 785 __calloc(size_t number, size_t size) 786 { 787 void *ptr; 788 789 if ((number >= MUL_NO_OVERFLOW || size >= MUL_NO_OVERFLOW) && 790 number > 0 && SIZE_MAX / number < size) { 791 errno = ENOMEM; 792 return(NULL); 793 } 794 795 ptr = _slaballoc(number * size, SAFLAG_ZERO); 796 if (ptr == NULL) 797 errno = ENOMEM; 798 else 799 UTRACE(0, number * size, ptr); 800 return(ptr); 801 } 802 803 /* 804 * realloc() (SLAB ALLOCATOR) 805 * 806 * We do not attempt to optimize this routine beyond reusing the same 807 * pointer if the new size fits within the chunking of the old pointer's 808 * zone. 809 */ 810 void * 811 __realloc(void *ptr, size_t size) 812 { 813 void *ret; 814 ret = _slabrealloc(ptr, size); 815 if (ret == NULL) 816 errno = ENOMEM; 817 else 818 UTRACE(ptr, size, ret); 819 return(ret); 820 } 821 822 /* 823 * aligned_alloc() 824 * 825 * Allocate (size) bytes with a alignment of (alignment). 826 */ 827 void * 828 __aligned_alloc(size_t alignment, size_t size) 829 { 830 void *ptr; 831 int rc; 832 833 ptr = NULL; 834 rc = _slabmemalign(&ptr, alignment, size); 835 if (rc) 836 errno = rc; 837 838 return (ptr); 839 } 840 841 /* 842 * posix_memalign() 843 * 844 * Allocate (size) bytes with a alignment of (alignment), where (alignment) 845 * is a power of 2 >= sizeof(void *). 846 */ 847 int 848 __posix_memalign(void **memptr, size_t alignment, size_t size) 849 { 850 int rc; 851 852 /* 853 * OpenGroup spec issue 6 check 854 */ 855 if (alignment < sizeof(void *)) { 856 *memptr = NULL; 857 return(EINVAL); 858 } 859 860 rc = _slabmemalign(memptr, alignment, size); 861 862 return (rc); 863 } 864 865 /* 866 * The slab allocator will allocate on power-of-2 boundaries up to 867 * at least PAGE_SIZE. We use the zoneindex mechanic to find a 868 * zone matching the requirements, and _vmem_alloc() otherwise. 869 */ 870 static int 871 _slabmemalign(void **memptr, size_t alignment, size_t size) 872 { 873 bigalloc_t *bigp; 874 bigalloc_t big; 875 size_t chunking; 876 int zi __unused; 877 878 if (alignment < 1) { 879 *memptr = NULL; 880 return(EINVAL); 881 } 882 883 /* 884 * OpenGroup spec issue 6 checks 885 */ 886 if ((alignment | (alignment - 1)) + 1 != (alignment << 1)) { 887 *memptr = NULL; 888 return(EINVAL); 889 } 890 891 /* 892 * Our zone mechanism guarantees same-sized alignment for any 893 * power-of-2 allocation. If size is a power-of-2 and reasonable 894 * we can just call _slaballoc() and be done. We round size up 895 * to the nearest alignment boundary to improve our odds of 896 * it becoming a power-of-2 if it wasn't before. 897 */ 898 if (size <= alignment) 899 size = alignment; 900 else 901 size = (size + alignment - 1) & ~(size_t)(alignment - 1); 902 903 /* 904 * If we have overflowed above when rounding to the nearest alignment 905 * boundary, just return ENOMEM, size should be == N * sizeof(void *). 906 * 907 * Power-of-2 allocations up to 8KB will be aligned to the allocation 908 * size and _slaballoc() can simply be used. Please see line 1082 909 * for this special case: 'Align the storage in the zone based on 910 * the chunking' has a special case for powers of 2. 911 */ 912 if (size == 0) 913 return(ENOMEM); 914 915 if (size <= PAGE_SIZE*2 && (size | (size - 1)) + 1 == (size << 1)) { 916 *memptr = _slaballoc(size, 0); 917 return(*memptr ? 0 : ENOMEM); 918 } 919 920 /* 921 * Otherwise locate a zone with a chunking that matches 922 * the requested alignment, within reason. Consider two cases: 923 * 924 * (1) A 1K allocation on a 32-byte alignment. The first zoneindex 925 * we find will be the best fit because the chunking will be 926 * greater or equal to the alignment. 927 * 928 * (2) A 513 allocation on a 256-byte alignment. In this case 929 * the first zoneindex we find will be for 576 byte allocations 930 * with a chunking of 64, which is not sufficient. To fix this 931 * we simply find the nearest power-of-2 >= size and use the 932 * same side-effect of _slaballoc() which guarantees 933 * same-alignment on a power-of-2 allocation. 934 */ 935 if (size < PAGE_SIZE) { 936 zi = zoneindex(&size, &chunking); 937 if (chunking >= alignment) { 938 *memptr = _slaballoc(size, 0); 939 return(*memptr ? 0 : ENOMEM); 940 } 941 if (size >= 1024) 942 alignment = 1024; 943 if (size >= 16384) 944 alignment = 16384; 945 while (alignment < size) 946 alignment <<= 1; 947 *memptr = _slaballoc(alignment, 0); 948 return(*memptr ? 0 : ENOMEM); 949 } 950 951 /* 952 * If the slab allocator cannot handle it use vmem_alloc(). 953 * 954 * Alignment must be adjusted up to at least PAGE_SIZE in this case. 955 */ 956 if (alignment < PAGE_SIZE) 957 alignment = PAGE_SIZE; 958 if (size < alignment) 959 size = alignment; 960 size = (size + PAGE_MASK) & ~(size_t)PAGE_MASK; 961 if (alignment == PAGE_SIZE && size <= BIGCACHE_LIMIT) { 962 big = bigcache_find_alloc(size); 963 if (big && big->bytes < size) { 964 _slabfree(big->base, FASTSLABREALLOC, &big); 965 big = NULL; 966 } 967 if (big) { 968 *memptr = big->base; 969 big->active = size; 970 if (big->active < big->bytes) { 971 atomic_add_long(&excess_alloc, 972 big->bytes - big->active); 973 } 974 bigp = bigalloc_lock(*memptr); 975 big->next = *bigp; 976 *bigp = big; 977 bigalloc_unlock(*memptr); 978 handle_excess_big(); 979 return(0); 980 } 981 } 982 *memptr = _vmem_alloc(size, alignment, 0); 983 if (*memptr == NULL) 984 return(ENOMEM); 985 986 big = _slaballoc(sizeof(struct bigalloc), 0); 987 if (big == NULL) { 988 _vmem_free(*memptr, size); 989 *memptr = NULL; 990 return(ENOMEM); 991 } 992 bigp = bigalloc_lock(*memptr); 993 big->base = *memptr; 994 big->active = size; 995 big->bytes = size; /* no excess */ 996 big->next = *bigp; 997 *bigp = big; 998 bigalloc_unlock(*memptr); 999 1000 return(0); 1001 } 1002 1003 /* 1004 * free() (SLAB ALLOCATOR) - do the obvious 1005 */ 1006 void 1007 __free(void *ptr) 1008 { 1009 UTRACE(ptr, 0, 0); 1010 _slabfree(ptr, 0, NULL); 1011 } 1012 1013 /* 1014 * _slaballoc() (SLAB ALLOCATOR) 1015 * 1016 * Allocate memory via the slab allocator. If the request is too large, 1017 * or if it page-aligned beyond a certain size, we fall back to the 1018 * KMEM subsystem 1019 */ 1020 static void * 1021 _slaballoc(size_t size, int flags) 1022 { 1023 slzone_t z; 1024 slchunk_t chunk; 1025 slglobaldata_t slgd; 1026 size_t chunking; 1027 int zi; 1028 int off; 1029 void *obj; 1030 1031 /* 1032 * Handle the degenerate size == 0 case. Yes, this does happen. 1033 * Return a special pointer. This is to maintain compatibility with 1034 * the original malloc implementation. Certain devices, such as the 1035 * adaptec driver, not only allocate 0 bytes, they check for NULL and 1036 * also realloc() later on. Joy. 1037 */ 1038 if (size == 0) 1039 size = 1; 1040 1041 /* Capture global flags */ 1042 flags |= g_malloc_flags; 1043 1044 /* 1045 * Handle large allocations directly. There should not be very many 1046 * of these so performance is not a big issue. 1047 * 1048 * The backend allocator is pretty nasty on a SMP system. Use the 1049 * slab allocator for one and two page-sized chunks even though we 1050 * lose some efficiency. 1051 * 1052 * NOTE: Please see posix_memalign around line 864, which assumes 1053 * that power-of-2 allocations of PAGE_SIZE and PAGE_SIZE*2 1054 * can use _slaballoc() and be aligned to the same. The 1055 * zone cache can be used for this case, bigalloc does not 1056 * have to be used. 1057 */ 1058 if (size >= ZoneLimit || 1059 ((size & PAGE_MASK) == 0 && size > PAGE_SIZE*2)) { 1060 bigalloc_t big; 1061 bigalloc_t *bigp; 1062 1063 /* 1064 * Page-align and cache-color in case of virtually indexed 1065 * physically tagged L1 caches (aka SandyBridge). No sweat 1066 * otherwise, so just do it. 1067 * 1068 * (don't count as excess). 1069 */ 1070 size = (size + PAGE_MASK) & ~(size_t)PAGE_MASK; 1071 1072 /* 1073 * If we have overflowed above when rounding to the page 1074 * boundary, something has passed us (size_t)[-PAGE_MASK..-1] 1075 * so just return NULL, size at this point should be >= 0. 1076 */ 1077 if (size == 0) 1078 return (NULL); 1079 1080 if ((size & (PAGE_SIZE * 2 - 1)) == 0) 1081 size += PAGE_SIZE; 1082 1083 /* 1084 * Try to reuse a cached big block to avoid mmap'ing. If it 1085 * turns out not to fit our requirements we throw it away 1086 * and allocate normally. 1087 */ 1088 big = NULL; 1089 if (size <= BIGCACHE_LIMIT) { 1090 big = bigcache_find_alloc(size); 1091 if (big && big->bytes < size) { 1092 _slabfree(big->base, FASTSLABREALLOC, &big); 1093 big = NULL; 1094 } 1095 } 1096 if (big) { 1097 chunk = big->base; 1098 if (flags & SAFLAG_ZERO) 1099 bzero(chunk, size); 1100 } else { 1101 chunk = _vmem_alloc(size, PAGE_SIZE, flags); 1102 if (chunk == NULL) 1103 return(NULL); 1104 1105 big = _slaballoc(sizeof(struct bigalloc), 0); 1106 if (big == NULL) { 1107 _vmem_free(chunk, size); 1108 return(NULL); 1109 } 1110 big->base = chunk; 1111 big->bytes = size; 1112 } 1113 big->active = size; 1114 1115 bigp = bigalloc_lock(chunk); 1116 if (big->active < big->bytes) { 1117 atomic_add_long(&excess_alloc, 1118 big->bytes - big->active); 1119 } 1120 big->next = *bigp; 1121 *bigp = big; 1122 bigalloc_unlock(chunk); 1123 handle_excess_big(); 1124 1125 return(chunk); 1126 } 1127 1128 /* Compute allocation zone; zoneindex will panic on excessive sizes */ 1129 zi = zoneindex(&size, &chunking); 1130 MASSERT(zi < NZONES); 1131 1132 obj = mtmagazine_alloc(zi); 1133 if (obj != NULL) { 1134 if (flags & SAFLAG_ZERO) 1135 bzero(obj, size); 1136 return (obj); 1137 } 1138 1139 slgd = &SLGlobalData; 1140 slgd_lock(slgd); 1141 1142 /* 1143 * Attempt to allocate out of an existing zone. If all zones are 1144 * exhausted pull one off the free list or allocate a new one. 1145 */ 1146 if ((z = slgd->ZoneAry[zi]) == NULL) { 1147 z = zone_alloc(flags); 1148 if (z == NULL) 1149 goto fail; 1150 1151 /* 1152 * How big is the base structure? 1153 */ 1154 off = sizeof(struct slzone); 1155 1156 /* 1157 * Align the storage in the zone based on the chunking. 1158 * 1159 * Guarantee power-of-2 alignment for power-of-2-sized 1160 * chunks. Otherwise align based on the chunking size 1161 * (typically 8 or 16 bytes for small allocations). 1162 * 1163 * NOTE: Allocations >= ZoneLimit are governed by the 1164 * bigalloc code and typically only guarantee page-alignment. 1165 * 1166 * Set initial conditions for UIndex near the zone header 1167 * to reduce unecessary page faults, vs semi-randomization 1168 * to improve L1 cache saturation. 1169 * 1170 * NOTE: Please see posix_memalign around line 864-ish, which 1171 * assumes that power-of-2 allocations of PAGE_SIZE 1172 * and PAGE_SIZE*2 can use _slaballoc() and be aligned 1173 * to the same. The zone cache can be used for this 1174 * case, bigalloc does not have to be used. 1175 * 1176 * ALL power-of-2 requests that fall through to this 1177 * code use this rule (conditionals above limit this 1178 * to <= PAGE_SIZE*2. 1179 */ 1180 if ((size | (size - 1)) + 1 == (size << 1)) 1181 off = roundup2(off, size); 1182 else 1183 off = roundup2(off, chunking); 1184 z->z_Magic = ZALLOC_SLAB_MAGIC; 1185 z->z_ZoneIndex = zi; 1186 z->z_NMax = (ZoneSize - off) / size; 1187 z->z_NFree = z->z_NMax; 1188 z->z_BasePtr = (char *)z + off; 1189 z->z_UIndex = z->z_UEndIndex = 0; 1190 z->z_ChunkSize = size; 1191 z->z_FirstFreePg = ZonePageCount; 1192 z->z_Next = slgd->ZoneAry[zi]; 1193 slgd->ZoneAry[zi] = z; 1194 if ((z->z_Flags & SLZF_UNOTZEROD) == 0) { 1195 flags &= ~SAFLAG_ZERO; /* already zero'd */ 1196 flags |= SAFLAG_PASSIVE; 1197 } 1198 1199 /* 1200 * Slide the base index for initial allocations out of the 1201 * next zone we create so we do not over-weight the lower 1202 * part of the cpu memory caches. 1203 */ 1204 slgd->JunkIndex = (slgd->JunkIndex + ZALLOC_SLAB_SLIDE) 1205 & (ZALLOC_MAX_ZONE_SIZE - 1); 1206 } 1207 1208 /* 1209 * Ok, we have a zone from which at least one chunk is available. 1210 * 1211 * Remove us from the ZoneAry[] when we become empty 1212 */ 1213 MASSERT(z->z_NFree > 0); 1214 1215 if (--z->z_NFree == 0) { 1216 slgd->ZoneAry[zi] = z->z_Next; 1217 z->z_Next = NULL; 1218 } 1219 1220 /* 1221 * Locate a chunk in a free page. This attempts to localize 1222 * reallocations into earlier pages without us having to sort 1223 * the chunk list. A chunk may still overlap a page boundary. 1224 */ 1225 while (z->z_FirstFreePg < ZonePageCount) { 1226 if ((chunk = z->z_PageAry[z->z_FirstFreePg]) != NULL) { 1227 MASSERT((uintptr_t)chunk & ZoneMask); 1228 z->z_PageAry[z->z_FirstFreePg] = chunk->c_Next; 1229 goto done; 1230 } 1231 ++z->z_FirstFreePg; 1232 } 1233 1234 /* 1235 * No chunks are available but NFree said we had some memory, 1236 * so it must be available in the never-before-used-memory 1237 * area governed by UIndex. The consequences are very 1238 * serious if our zone got corrupted so we use an explicit 1239 * panic rather then a KASSERT. 1240 */ 1241 chunk = (slchunk_t)(z->z_BasePtr + z->z_UIndex * size); 1242 1243 if (++z->z_UIndex == z->z_NMax) 1244 z->z_UIndex = 0; 1245 if (z->z_UIndex == z->z_UEndIndex) { 1246 if (z->z_NFree != 0) 1247 _mpanic("slaballoc: corrupted zone"); 1248 } 1249 1250 if ((z->z_Flags & SLZF_UNOTZEROD) == 0) { 1251 flags &= ~SAFLAG_ZERO; 1252 flags |= SAFLAG_PASSIVE; 1253 } 1254 1255 done: 1256 slgd_unlock(slgd); 1257 if (flags & SAFLAG_ZERO) 1258 bzero(chunk, size); 1259 return(chunk); 1260 fail: 1261 slgd_unlock(slgd); 1262 return(NULL); 1263 } 1264 1265 /* 1266 * Reallocate memory within the chunk 1267 */ 1268 static void * 1269 _slabrealloc(void *ptr, size_t size) 1270 { 1271 bigalloc_t *bigp; 1272 void *nptr; 1273 slzone_t z; 1274 size_t chunking; 1275 1276 if (ptr == NULL) { 1277 return(_slaballoc(size, 0)); 1278 } 1279 1280 if (size == 0) 1281 size = 1; 1282 1283 /* 1284 * Handle oversized allocations. 1285 */ 1286 if ((bigp = bigalloc_check_and_lock(ptr)) != NULL) { 1287 bigalloc_t big; 1288 size_t bigbytes; 1289 1290 while ((big = *bigp) != NULL) { 1291 if (big->base == ptr) { 1292 size = (size + PAGE_MASK) & ~(size_t)PAGE_MASK; 1293 bigbytes = big->bytes; 1294 1295 /* 1296 * If it already fits determine if it makes 1297 * sense to shrink/reallocate. Try to optimize 1298 * programs which stupidly make incremental 1299 * reallocations larger or smaller by scaling 1300 * the allocation. Also deal with potential 1301 * coloring. 1302 */ 1303 if (size >= (bigbytes >> 1) && 1304 size <= bigbytes) { 1305 if (big->active != size) { 1306 atomic_add_long(&excess_alloc, 1307 big->active - 1308 size); 1309 } 1310 big->active = size; 1311 bigalloc_unlock(ptr); 1312 return(ptr); 1313 } 1314 1315 /* 1316 * For large reallocations, allocate more space 1317 * than we need to try to avoid excessive 1318 * reallocations later on. 1319 */ 1320 chunking = size + (size >> 3); 1321 chunking = (chunking + PAGE_MASK) & 1322 ~(size_t)PAGE_MASK; 1323 1324 /* 1325 * Try to allocate adjacently in case the 1326 * program is idiotically realloc()ing a 1327 * huge memory block just slightly bigger. 1328 * (llvm's llc tends to do this a lot). 1329 * 1330 * (MAP_TRYFIXED forces mmap to fail if there 1331 * is already something at the address). 1332 */ 1333 if (chunking > bigbytes) { 1334 char *addr; 1335 int errno_save = errno; 1336 1337 addr = mmap((char *)ptr + bigbytes, 1338 chunking - bigbytes, 1339 PROT_READ|PROT_WRITE, 1340 MAP_PRIVATE|MAP_ANON| 1341 MAP_TRYFIXED, 1342 -1, 0); 1343 errno = errno_save; 1344 if (addr == (char *)ptr + bigbytes) { 1345 atomic_add_long(&excess_alloc, 1346 big->active - 1347 big->bytes + 1348 chunking - 1349 size); 1350 big->bytes = chunking; 1351 big->active = size; 1352 bigalloc_unlock(ptr); 1353 1354 return(ptr); 1355 } 1356 MASSERT((void *)addr == MAP_FAILED); 1357 } 1358 1359 /* 1360 * Failed, unlink big and allocate fresh. 1361 * (note that we have to leave (big) intact 1362 * in case the slaballoc fails). 1363 */ 1364 *bigp = big->next; 1365 bigalloc_unlock(ptr); 1366 if ((nptr = _slaballoc(size, 0)) == NULL) { 1367 /* Relink block */ 1368 bigp = bigalloc_lock(ptr); 1369 big->next = *bigp; 1370 *bigp = big; 1371 bigalloc_unlock(ptr); 1372 return(NULL); 1373 } 1374 if (size > bigbytes) 1375 size = bigbytes; 1376 bcopy(ptr, nptr, size); 1377 atomic_add_long(&excess_alloc, big->active - 1378 big->bytes); 1379 _slabfree(ptr, FASTSLABREALLOC, &big); 1380 1381 return(nptr); 1382 } 1383 bigp = &big->next; 1384 } 1385 bigalloc_unlock(ptr); 1386 handle_excess_big(); 1387 } 1388 1389 /* 1390 * Get the original allocation's zone. If the new request winds 1391 * up using the same chunk size we do not have to do anything. 1392 * 1393 * NOTE: We don't have to lock the globaldata here, the fields we 1394 * access here will not change at least as long as we have control 1395 * over the allocation. 1396 */ 1397 z = (slzone_t)((uintptr_t)ptr & ~(uintptr_t)ZoneMask); 1398 MASSERT(z->z_Magic == ZALLOC_SLAB_MAGIC); 1399 1400 /* 1401 * Use zoneindex() to chunk-align the new size, as long as the 1402 * new size is not too large. 1403 */ 1404 if (size < ZoneLimit) { 1405 zoneindex(&size, &chunking); 1406 if (z->z_ChunkSize == size) { 1407 return(ptr); 1408 } 1409 } 1410 1411 /* 1412 * Allocate memory for the new request size and copy as appropriate. 1413 */ 1414 if ((nptr = _slaballoc(size, 0)) != NULL) { 1415 if (size > z->z_ChunkSize) 1416 size = z->z_ChunkSize; 1417 bcopy(ptr, nptr, size); 1418 _slabfree(ptr, 0, NULL); 1419 } 1420 1421 return(nptr); 1422 } 1423 1424 /* 1425 * free (SLAB ALLOCATOR) 1426 * 1427 * Free a memory block previously allocated by malloc. Note that we do not 1428 * attempt to uplodate ks_loosememuse as MP races could prevent us from 1429 * checking memory limits in malloc. 1430 * 1431 * flags: 1432 * FASTSLABREALLOC Fast call from realloc, *rbigp already 1433 * unlinked. 1434 * 1435 * MPSAFE 1436 */ 1437 static void 1438 _slabfree(void *ptr, int flags, bigalloc_t *rbigp) 1439 { 1440 slzone_t z; 1441 slchunk_t chunk; 1442 bigalloc_t big; 1443 bigalloc_t *bigp; 1444 slglobaldata_t slgd; 1445 size_t size; 1446 int zi; 1447 int pgno; 1448 1449 /* Fast realloc path for big allocations */ 1450 if (flags & FASTSLABREALLOC) { 1451 big = *rbigp; 1452 goto fastslabrealloc; 1453 } 1454 1455 /* 1456 * Handle NULL frees and special 0-byte allocations 1457 */ 1458 if (ptr == NULL) 1459 return; 1460 1461 /* 1462 * Handle oversized allocations. 1463 */ 1464 if ((bigp = bigalloc_check_and_lock(ptr)) != NULL) { 1465 while ((big = *bigp) != NULL) { 1466 if (big->base == ptr) { 1467 *bigp = big->next; 1468 atomic_add_long(&excess_alloc, big->active - 1469 big->bytes); 1470 bigalloc_unlock(ptr); 1471 1472 /* 1473 * Try to stash the block we are freeing, 1474 * potentially receiving another block in 1475 * return which must be freed. 1476 */ 1477 fastslabrealloc: 1478 if (big->bytes <= BIGCACHE_LIMIT) { 1479 big = bigcache_find_free(big); 1480 if (big == NULL) 1481 return; 1482 } 1483 ptr = big->base; /* reload */ 1484 size = big->bytes; 1485 _slabfree(big, 0, NULL); 1486 _vmem_free(ptr, size); 1487 return; 1488 } 1489 bigp = &big->next; 1490 } 1491 bigalloc_unlock(ptr); 1492 handle_excess_big(); 1493 } 1494 1495 /* 1496 * Zone case. Figure out the zone based on the fact that it is 1497 * ZoneSize aligned. 1498 */ 1499 z = (slzone_t)((uintptr_t)ptr & ~(uintptr_t)ZoneMask); 1500 MASSERT(z->z_Magic == ZALLOC_SLAB_MAGIC); 1501 1502 size = z->z_ChunkSize; 1503 zi = z->z_ZoneIndex; 1504 1505 if (g_malloc_flags & SAFLAG_ZERO) 1506 bzero(ptr, size); 1507 1508 if (mtmagazine_free(zi, ptr) == 0) 1509 return; 1510 1511 pgno = ((char *)ptr - (char *)z) >> PAGE_SHIFT; 1512 chunk = ptr; 1513 slgd = &SLGlobalData; 1514 slgd_lock(slgd); 1515 1516 /* 1517 * Add this free non-zero'd chunk to a linked list for reuse, adjust 1518 * z_FirstFreePg. 1519 */ 1520 chunk->c_Next = z->z_PageAry[pgno]; 1521 z->z_PageAry[pgno] = chunk; 1522 if (z->z_FirstFreePg > pgno) 1523 z->z_FirstFreePg = pgno; 1524 1525 /* 1526 * Bump the number of free chunks. If it becomes non-zero the zone 1527 * must be added back onto the appropriate list. 1528 */ 1529 if (z->z_NFree++ == 0) { 1530 z->z_Next = slgd->ZoneAry[z->z_ZoneIndex]; 1531 slgd->ZoneAry[z->z_ZoneIndex] = z; 1532 } 1533 1534 /* 1535 * If the zone becomes totally free then release it. 1536 */ 1537 if (z->z_NFree == z->z_NMax) { 1538 slzone_t *pz; 1539 1540 pz = &slgd->ZoneAry[z->z_ZoneIndex]; 1541 while (z != *pz) 1542 pz = &(*pz)->z_Next; 1543 *pz = z->z_Next; 1544 z->z_Magic = -1; 1545 z->z_Next = NULL; 1546 zone_free(z); 1547 /* slgd lock released */ 1548 return; 1549 } 1550 slgd_unlock(slgd); 1551 } 1552 1553 /* 1554 * Allocate and return a magazine. NULL is returned and *burst is adjusted 1555 * if the magazine is empty. 1556 */ 1557 static __inline void * 1558 magazine_alloc(struct magazine *mp, int *burst) 1559 { 1560 void *obj; 1561 1562 if (mp == NULL) 1563 return(NULL); 1564 if (MAGAZINE_NOTEMPTY(mp)) { 1565 obj = mp->objects[--mp->rounds]; 1566 return(obj); 1567 } 1568 1569 /* 1570 * Return burst factor to caller along with NULL 1571 */ 1572 if ((mp->flags & M_BURST) && (burst != NULL)) { 1573 *burst = mp->burst_factor; 1574 } 1575 /* Reduce burst factor by NSCALE; if it hits 1, disable BURST */ 1576 if ((mp->flags & M_BURST) && (mp->flags & M_BURST_EARLY) && 1577 (burst != NULL)) { 1578 mp->burst_factor -= M_BURST_NSCALE; 1579 if (mp->burst_factor <= 1) { 1580 mp->burst_factor = 1; 1581 mp->flags &= ~(M_BURST); 1582 mp->flags &= ~(M_BURST_EARLY); 1583 } 1584 } 1585 return (NULL); 1586 } 1587 1588 static __inline int 1589 magazine_free(struct magazine *mp, void *p) 1590 { 1591 if (mp != NULL && MAGAZINE_NOTFULL(mp)) { 1592 mp->objects[mp->rounds++] = p; 1593 return 0; 1594 } 1595 1596 return -1; 1597 } 1598 1599 static void * 1600 mtmagazine_alloc(int zi) 1601 { 1602 thr_mags *tp; 1603 struct magazine *mp, *emptymag; 1604 magazine_depot *d; 1605 void *obj; 1606 1607 /* 1608 * Do not try to access per-thread magazines while the mtmagazine 1609 * is being initialized or destroyed. 1610 */ 1611 tp = &thread_mags; 1612 if (tp->init < 0) 1613 return(NULL); 1614 1615 /* 1616 * Primary per-thread allocation loop 1617 */ 1618 for (;;) { 1619 /* 1620 * If the loaded magazine has rounds, allocate and return 1621 */ 1622 mp = tp->mags[zi].loaded; 1623 obj = magazine_alloc(mp, NULL); 1624 if (obj) 1625 break; 1626 1627 /* 1628 * If the prev magazine is full, swap with the loaded 1629 * magazine and retry. 1630 */ 1631 mp = tp->mags[zi].prev; 1632 if (mp && MAGAZINE_FULL(mp)) { 1633 MASSERT(mp->rounds != 0); 1634 swap_mags(&tp->mags[zi]); /* prev now empty */ 1635 continue; 1636 } 1637 1638 /* 1639 * Try to get a full magazine from the depot. Cycle 1640 * through depot(full)->loaded->prev->depot(empty). 1641 * Retry if a full magazine was available from the depot. 1642 * 1643 * Return NULL (caller will fall through) if no magazines 1644 * can be found anywhere. 1645 */ 1646 d = &depots[zi]; 1647 depot_lock(d); 1648 emptymag = tp->mags[zi].prev; 1649 if (emptymag) 1650 SLIST_INSERT_HEAD(&d->empty, emptymag, nextmagazine); 1651 tp->mags[zi].prev = tp->mags[zi].loaded; 1652 mp = SLIST_FIRST(&d->full); /* loaded magazine */ 1653 tp->mags[zi].loaded = mp; 1654 if (mp) { 1655 SLIST_REMOVE_HEAD(&d->full, nextmagazine); 1656 MASSERT(MAGAZINE_NOTEMPTY(mp)); 1657 depot_unlock(d); 1658 continue; 1659 } 1660 depot_unlock(d); 1661 break; 1662 } 1663 1664 return (obj); 1665 } 1666 1667 static int 1668 mtmagazine_free(int zi, void *ptr) 1669 { 1670 thr_mags *tp; 1671 struct magazine *mp, *loadedmag; 1672 magazine_depot *d; 1673 int rc = -1; 1674 1675 /* 1676 * Do not try to access per-thread magazines while the mtmagazine 1677 * is being initialized or destroyed. 1678 */ 1679 tp = &thread_mags; 1680 if (tp->init < 0) 1681 return(-1); 1682 1683 /* 1684 * Primary per-thread freeing loop 1685 */ 1686 for (;;) { 1687 /* 1688 * Make sure a new magazine is available in case we have 1689 * to use it. Staging the newmag allows us to avoid 1690 * some locking/reentrancy complexity. 1691 * 1692 * Temporarily disable the per-thread caches for this 1693 * allocation to avoid reentrancy and/or to avoid a 1694 * stack overflow if the [zi] happens to be the same that 1695 * would be used to allocate the new magazine. 1696 */ 1697 if (tp->newmag == NULL) { 1698 tp->init = -1; 1699 tp->newmag = _slaballoc(sizeof(struct magazine), 1700 SAFLAG_ZERO); 1701 tp->init = 1; 1702 if (tp->newmag == NULL) { 1703 rc = -1; 1704 break; 1705 } 1706 } 1707 1708 /* 1709 * If the loaded magazine has space, free directly to it 1710 */ 1711 rc = magazine_free(tp->mags[zi].loaded, ptr); 1712 if (rc == 0) 1713 break; 1714 1715 /* 1716 * If the prev magazine is empty, swap with the loaded 1717 * magazine and retry. 1718 */ 1719 mp = tp->mags[zi].prev; 1720 if (mp && MAGAZINE_EMPTY(mp)) { 1721 MASSERT(mp->rounds == 0); 1722 swap_mags(&tp->mags[zi]); /* prev now full */ 1723 continue; 1724 } 1725 1726 /* 1727 * Try to get an empty magazine from the depot. Cycle 1728 * through depot(empty)->loaded->prev->depot(full). 1729 * Retry if an empty magazine was available from the depot. 1730 */ 1731 d = &depots[zi]; 1732 depot_lock(d); 1733 1734 if ((loadedmag = tp->mags[zi].prev) != NULL) 1735 SLIST_INSERT_HEAD(&d->full, loadedmag, nextmagazine); 1736 tp->mags[zi].prev = tp->mags[zi].loaded; 1737 mp = SLIST_FIRST(&d->empty); 1738 if (mp) { 1739 tp->mags[zi].loaded = mp; 1740 SLIST_REMOVE_HEAD(&d->empty, nextmagazine); 1741 MASSERT(MAGAZINE_NOTFULL(mp)); 1742 } else { 1743 mp = tp->newmag; 1744 tp->newmag = NULL; 1745 mp->capacity = M_MAX_ROUNDS; 1746 mp->rounds = 0; 1747 mp->flags = 0; 1748 tp->mags[zi].loaded = mp; 1749 } 1750 depot_unlock(d); 1751 } 1752 1753 return rc; 1754 } 1755 1756 static void 1757 mtmagazine_init(void) 1758 { 1759 int error; 1760 1761 error = _pthread_key_create(&thread_mags_key, mtmagazine_destructor); 1762 if (error) 1763 abort(); 1764 } 1765 1766 /* 1767 * This function is only used by the thread exit destructor 1768 */ 1769 static void 1770 mtmagazine_drain(struct magazine *mp) 1771 { 1772 void *obj; 1773 1774 while (MAGAZINE_NOTEMPTY(mp)) { 1775 obj = magazine_alloc(mp, NULL); 1776 _slabfree(obj, 0, NULL); 1777 } 1778 } 1779 1780 /* 1781 * mtmagazine_destructor() 1782 * 1783 * When a thread exits, we reclaim all its resources; all its magazines are 1784 * drained and the structures are freed. 1785 * 1786 * WARNING! The destructor can be called multiple times if the larger user 1787 * program has its own destructors which run after ours which 1788 * allocate or free memory. 1789 */ 1790 static void 1791 mtmagazine_destructor(void *thrp) 1792 { 1793 thr_mags *tp = thrp; 1794 struct magazine *mp; 1795 int i; 1796 1797 /* 1798 * Prevent further use of mtmagazines while we are destructing 1799 * them, as well as for any destructors which are run after us 1800 * prior to the thread actually being destroyed. 1801 */ 1802 tp->init = -1; 1803 1804 for (i = 0; i < NZONES; i++) { 1805 mp = tp->mags[i].loaded; 1806 tp->mags[i].loaded = NULL; 1807 if (mp) { 1808 if (MAGAZINE_NOTEMPTY(mp)) 1809 mtmagazine_drain(mp); 1810 _slabfree(mp, 0, NULL); 1811 } 1812 1813 mp = tp->mags[i].prev; 1814 tp->mags[i].prev = NULL; 1815 if (mp) { 1816 if (MAGAZINE_NOTEMPTY(mp)) 1817 mtmagazine_drain(mp); 1818 _slabfree(mp, 0, NULL); 1819 } 1820 } 1821 1822 if (tp->newmag) { 1823 mp = tp->newmag; 1824 tp->newmag = NULL; 1825 _slabfree(mp, 0, NULL); 1826 } 1827 } 1828 1829 /* 1830 * zone_alloc() 1831 * 1832 * Attempt to allocate a zone from the zone magazine; the zone magazine has 1833 * M_BURST_EARLY enabled, so honor the burst request from the magazine. 1834 */ 1835 static slzone_t 1836 zone_alloc(int flags) 1837 { 1838 slglobaldata_t slgd = &SLGlobalData; 1839 int burst = 1; 1840 int i, j; 1841 slzone_t z; 1842 1843 zone_magazine_lock(); 1844 slgd_unlock(slgd); 1845 1846 z = magazine_alloc(&zone_magazine, &burst); 1847 if (z == NULL && burst == 1) { 1848 zone_magazine_unlock(); 1849 z = _vmem_alloc(ZoneSize * burst, ZoneSize, flags); 1850 } else if (z == NULL) { 1851 z = _vmem_alloc(ZoneSize * burst, ZoneSize, flags); 1852 if (z) { 1853 for (i = 1; i < burst; i++) { 1854 j = magazine_free(&zone_magazine, 1855 (char *) z + (ZoneSize * i)); 1856 MASSERT(j == 0); 1857 } 1858 } 1859 zone_magazine_unlock(); 1860 } else { 1861 z->z_Flags |= SLZF_UNOTZEROD; 1862 zone_magazine_unlock(); 1863 } 1864 slgd_lock(slgd); 1865 return z; 1866 } 1867 1868 /* 1869 * zone_free() 1870 * 1871 * Release a zone and unlock the slgd lock. 1872 */ 1873 static void 1874 zone_free(void *z) 1875 { 1876 slglobaldata_t slgd = &SLGlobalData; 1877 void *excess[M_ZONE_ROUNDS - M_LOW_ROUNDS] = {}; 1878 int i, j; 1879 1880 zone_magazine_lock(); 1881 slgd_unlock(slgd); 1882 1883 bzero(z, sizeof(struct slzone)); 1884 1885 if (opt_madvise) 1886 madvise(z, ZoneSize, MADV_FREE); 1887 1888 i = magazine_free(&zone_magazine, z); 1889 1890 /* 1891 * If we failed to free, collect excess magazines; release the zone 1892 * magazine lock, and then free to the system via _vmem_free. Re-enable 1893 * BURST mode for the magazine. 1894 */ 1895 if (i == -1) { 1896 j = zone_magazine.rounds - zone_magazine.low_factor; 1897 for (i = 0; i < j; i++) { 1898 excess[i] = magazine_alloc(&zone_magazine, NULL); 1899 MASSERT(excess[i] != NULL); 1900 } 1901 1902 zone_magazine_unlock(); 1903 1904 for (i = 0; i < j; i++) 1905 _vmem_free(excess[i], ZoneSize); 1906 1907 _vmem_free(z, ZoneSize); 1908 } else { 1909 zone_magazine_unlock(); 1910 } 1911 } 1912 1913 /* 1914 * _vmem_alloc() 1915 * 1916 * Directly map memory in PAGE_SIZE'd chunks with the specified 1917 * alignment. 1918 * 1919 * Alignment must be a multiple of PAGE_SIZE. 1920 * 1921 * Size must be >= alignment. 1922 */ 1923 static void * 1924 _vmem_alloc(size_t size, size_t align, int flags) 1925 { 1926 char *addr; 1927 char *save; 1928 size_t excess; 1929 1930 /* 1931 * Map anonymous private memory. 1932 */ 1933 addr = mmap(NULL, size, PROT_READ|PROT_WRITE, 1934 MAP_PRIVATE|MAP_ANON, -1, 0); 1935 if (addr == MAP_FAILED) 1936 return(NULL); 1937 1938 /* 1939 * Check alignment. The misaligned offset is also the excess 1940 * amount. If misaligned unmap the excess so we have a chance of 1941 * mapping at the next alignment point and recursively try again. 1942 * 1943 * BBBBBBBBBBB BBBBBBBBBBB BBBBBBBBBBB block alignment 1944 * aaaaaaaaa aaaaaaaaaaa aa mis-aligned allocation 1945 * xxxxxxxxx final excess calculation 1946 * ^ returned address 1947 */ 1948 excess = (uintptr_t)addr & (align - 1); 1949 1950 if (excess) { 1951 excess = align - excess; 1952 save = addr; 1953 1954 munmap(save + excess, size - excess); 1955 addr = _vmem_alloc(size, align, flags); 1956 munmap(save, excess); 1957 } 1958 return((void *)addr); 1959 } 1960 1961 /* 1962 * _vmem_free() 1963 * 1964 * Free a chunk of memory allocated with _vmem_alloc() 1965 */ 1966 static void 1967 _vmem_free(void *ptr, size_t size) 1968 { 1969 munmap(ptr, size); 1970 } 1971 1972 /* 1973 * Panic on fatal conditions 1974 */ 1975 static void 1976 _mpanic(const char *ctl, ...) 1977 { 1978 va_list va; 1979 1980 if (malloc_panic == 0) { 1981 malloc_panic = 1; 1982 va_start(va, ctl); 1983 vfprintf(stderr, ctl, va); 1984 fprintf(stderr, "\n"); 1985 fflush(stderr); 1986 va_end(va); 1987 } 1988 abort(); 1989 } 1990 1991 __weak_reference(__aligned_alloc, aligned_alloc); 1992 __weak_reference(__malloc, malloc); 1993 __weak_reference(__calloc, calloc); 1994 __weak_reference(__posix_memalign, posix_memalign); 1995 __weak_reference(__realloc, realloc); 1996 __weak_reference(__free, free); 1997