1 /* 2 * NMALLOC.C - New Malloc (ported from kernel slab allocator) 3 * 4 * Copyright (c) 2003,2004,2009,2010 The DragonFly Project. All rights reserved. 5 * 6 * This code is derived from software contributed to The DragonFly Project 7 * by Matthew Dillon <dillon@backplane.com> and by 8 * Venkatesh Srinivas <me@endeavour.zapto.org>. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in 18 * the documentation and/or other materials provided with the 19 * distribution. 20 * 3. Neither the name of The DragonFly Project nor the names of its 21 * contributors may be used to endorse or promote products derived 22 * from this software without specific, prior written permission. 23 * 24 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 25 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 26 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 27 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 28 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 29 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, 30 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 31 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 32 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 33 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 34 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 35 * SUCH DAMAGE. 36 * 37 * $Id: nmalloc.c,v 1.37 2010/07/23 08:20:35 vsrinivas Exp $ 38 */ 39 /* 40 * This module implements a slab allocator drop-in replacement for the 41 * libc malloc(). 42 * 43 * A slab allocator reserves a ZONE for each chunk size, then lays the 44 * chunks out in an array within the zone. Allocation and deallocation 45 * is nearly instantaneous, and overhead losses are limited to a fixed 46 * worst-case amount. 47 * 48 * The slab allocator does not have to pre-initialize the list of 49 * free chunks for each zone, and the underlying VM will not be 50 * touched at all beyond the zone header until an actual allocation 51 * needs it. 52 * 53 * Slab management and locking is done on a per-zone basis. 54 * 55 * Alloc Size Chunking Number of zones 56 * 0-127 8 16 57 * 128-255 16 8 58 * 256-511 32 8 59 * 512-1023 64 8 60 * 1024-2047 128 8 61 * 2048-4095 256 8 62 * 4096-8191 512 8 63 * 8192-16383 1024 8 64 * 16384-32767 2048 8 65 * 66 * Allocations >= ZoneLimit (16K) go directly to mmap and a hash table 67 * is used to locate for free. One and Two-page allocations use the 68 * zone mechanic to avoid excessive mmap()/munmap() calls. 69 * 70 * API FEATURES AND SIDE EFFECTS 71 * 72 * + power-of-2 sized allocations up to a page will be power-of-2 aligned. 73 * Above that power-of-2 sized allocations are page-aligned. Non 74 * power-of-2 sized allocations are aligned the same as the chunk 75 * size for their zone. 76 * + malloc(0) returns a special non-NULL value 77 * + ability to allocate arbitrarily large chunks of memory 78 * + realloc will reuse the passed pointer if possible, within the 79 * limitations of the zone chunking. 80 * 81 * Multithreaded enhancements for small allocations introduced August 2010. 82 * These are in the spirit of 'libumem'. See: 83 * Bonwick, J.; Adams, J. (2001). "Magazines and Vmem: Extending the 84 * slab allocator to many CPUs and arbitrary resources". In Proc. 2001 85 * USENIX Technical Conference. USENIX Association. 86 * 87 * Oversized allocations employ the BIGCACHE mechanic whereby large 88 * allocations may be handed significantly larger buffers, allowing them 89 * to avoid mmap/munmap operations even through significant realloc()s. 90 * The excess space is only trimmed if too many large allocations have been 91 * given this treatment. 92 * 93 * TUNING 94 * 95 * The value of the environment variable MALLOC_OPTIONS is a character string 96 * containing various flags to tune nmalloc. 97 * 98 * 'U' / ['u'] Generate / do not generate utrace entries for ktrace(1) 99 * This will generate utrace events for all malloc, 100 * realloc, and free calls. There are tools (mtrplay) to 101 * replay and allocation pattern or to graph heap structure 102 * (mtrgraph) which can interpret these logs. 103 * 'Z' / ['z'] Zero out / do not zero all allocations. 104 * Each new byte of memory allocated by malloc, realloc, or 105 * reallocf will be initialized to 0. This is intended for 106 * debugging and will affect performance negatively. 107 * 'H' / ['h'] Pass a hint to the kernel about pages unused by the 108 * allocation functions. 109 */ 110 111 /* cc -shared -fPIC -g -O -I/usr/src/lib/libc/include -o nmalloc.so nmalloc.c */ 112 113 #include "libc_private.h" 114 115 #include <sys/param.h> 116 #include <sys/types.h> 117 #include <sys/mman.h> 118 #include <sys/queue.h> 119 #include <sys/uio.h> 120 #include <sys/ktrace.h> 121 #include <stdio.h> 122 #include <stdint.h> 123 #include <stdlib.h> 124 #include <stdarg.h> 125 #include <stddef.h> 126 #include <unistd.h> 127 #include <string.h> 128 #include <fcntl.h> 129 #include <errno.h> 130 #include <pthread.h> 131 #include <machine/atomic.h> 132 133 #include "spinlock.h" 134 #include "un-namespace.h" 135 136 137 /* 138 * Linked list of large allocations 139 */ 140 typedef struct bigalloc { 141 struct bigalloc *next; /* hash link */ 142 void *base; /* base pointer */ 143 u_long active; /* bytes active */ 144 u_long bytes; /* bytes allocated */ 145 } *bigalloc_t; 146 147 /* 148 * Note that any allocations which are exact multiples of PAGE_SIZE, or 149 * which are >= ZALLOC_ZONE_LIMIT, will fall through to the kmem subsystem. 150 */ 151 #define ZALLOC_ZONE_LIMIT (16 * 1024) /* max slab-managed alloc */ 152 #define ZALLOC_MIN_ZONE_SIZE (32 * 1024) /* minimum zone size */ 153 #define ZALLOC_MAX_ZONE_SIZE (128 * 1024) /* maximum zone size */ 154 #define ZALLOC_ZONE_SIZE (64 * 1024) 155 #define ZALLOC_SLAB_MAGIC 0x736c6162 /* magic sanity */ 156 #define ZALLOC_SLAB_SLIDE 20 /* L1-cache skip */ 157 158 #if ZALLOC_ZONE_LIMIT == 16384 159 #define NZONES 72 160 #elif ZALLOC_ZONE_LIMIT == 32768 161 #define NZONES 80 162 #else 163 #error "I couldn't figure out NZONES" 164 #endif 165 166 /* 167 * Chunk structure for free elements 168 */ 169 typedef struct slchunk { 170 struct slchunk *c_Next; 171 } *slchunk_t; 172 173 /* 174 * The IN-BAND zone header is placed at the beginning of each zone. 175 */ 176 struct slglobaldata; 177 178 typedef struct slzone { 179 int32_t z_Magic; /* magic number for sanity check */ 180 int z_NFree; /* total free chunks / ualloc space */ 181 struct slzone *z_Next; /* ZoneAry[] link if z_NFree non-zero */ 182 int z_NMax; /* maximum free chunks */ 183 char *z_BasePtr; /* pointer to start of chunk array */ 184 int z_UIndex; /* current initial allocation index */ 185 int z_UEndIndex; /* last (first) allocation index */ 186 int z_ChunkSize; /* chunk size for validation */ 187 int z_FirstFreePg; /* chunk list on a page-by-page basis */ 188 int z_ZoneIndex; 189 int z_Flags; 190 struct slchunk *z_PageAry[ZALLOC_ZONE_SIZE / PAGE_SIZE]; 191 } *slzone_t; 192 193 typedef struct slglobaldata { 194 spinlock_t Spinlock; 195 slzone_t ZoneAry[NZONES];/* linked list of zones NFree > 0 */ 196 int JunkIndex; 197 } *slglobaldata_t; 198 199 #define SLZF_UNOTZEROD 0x0001 200 201 #define FASTSLABREALLOC 0x02 202 203 /* 204 * Misc constants. Note that allocations that are exact multiples of 205 * PAGE_SIZE, or exceed the zone limit, fall through to the kmem module. 206 * IN_SAME_PAGE_MASK is used to sanity-check the per-page free lists. 207 */ 208 #define MIN_CHUNK_SIZE 8 /* in bytes */ 209 #define MIN_CHUNK_MASK (MIN_CHUNK_SIZE - 1) 210 #define IN_SAME_PAGE_MASK (~(intptr_t)PAGE_MASK | MIN_CHUNK_MASK) 211 212 /* 213 * WARNING: A limited number of spinlocks are available, BIGXSIZE should 214 * not be larger then 64. 215 */ 216 #define BIGHSHIFT 10 /* bigalloc hash table */ 217 #define BIGHSIZE (1 << BIGHSHIFT) 218 #define BIGHMASK (BIGHSIZE - 1) 219 #define BIGXSIZE (BIGHSIZE / 16) /* bigalloc lock table */ 220 #define BIGXMASK (BIGXSIZE - 1) 221 222 /* 223 * BIGCACHE caches oversized allocations. Note that a linear search is 224 * performed, so do not make the cache too large. 225 * 226 * BIGCACHE will garbage-collect excess space when the excess exceeds the 227 * specified value. A relatively large number should be used here because 228 * garbage collection is expensive. 229 */ 230 #define BIGCACHE 16 231 #define BIGCACHE_MASK (BIGCACHE - 1) 232 #define BIGCACHE_LIMIT (1024 * 1024) /* size limit */ 233 #define BIGCACHE_EXCESS (16 * 1024 * 1024) /* garbage collect */ 234 235 #define SAFLAG_ZERO 0x0001 236 #define SAFLAG_PASSIVE 0x0002 237 238 /* 239 * Thread control 240 */ 241 242 #define arysize(ary) (sizeof(ary)/sizeof((ary)[0])) 243 244 #define MASSERT(exp) do { if (__predict_false(!(exp))) \ 245 _mpanic("assertion: %s in %s", \ 246 #exp, __func__); \ 247 } while (0) 248 249 /* 250 * Magazines 251 */ 252 253 #define M_MAX_ROUNDS 64 254 #define M_ZONE_ROUNDS 64 255 #define M_LOW_ROUNDS 32 256 #define M_INIT_ROUNDS 8 257 #define M_BURST_FACTOR 8 258 #define M_BURST_NSCALE 2 259 260 #define M_BURST 0x0001 261 #define M_BURST_EARLY 0x0002 262 263 struct magazine { 264 SLIST_ENTRY(magazine) nextmagazine; 265 266 int flags; 267 int capacity; /* Max rounds in this magazine */ 268 int rounds; /* Current number of free rounds */ 269 int burst_factor; /* Number of blocks to prefill with */ 270 int low_factor; /* Free till low_factor from full mag */ 271 void *objects[M_MAX_ROUNDS]; 272 }; 273 274 SLIST_HEAD(magazinelist, magazine); 275 276 static spinlock_t zone_mag_lock; 277 static spinlock_t depot_spinlock; 278 static struct magazine zone_magazine = { 279 .flags = M_BURST | M_BURST_EARLY, 280 .capacity = M_ZONE_ROUNDS, 281 .rounds = 0, 282 .burst_factor = M_BURST_FACTOR, 283 .low_factor = M_LOW_ROUNDS 284 }; 285 286 #define MAGAZINE_FULL(mp) (mp->rounds == mp->capacity) 287 #define MAGAZINE_NOTFULL(mp) (mp->rounds < mp->capacity) 288 #define MAGAZINE_EMPTY(mp) (mp->rounds == 0) 289 #define MAGAZINE_NOTEMPTY(mp) (mp->rounds != 0) 290 291 /* 292 * Each thread will have a pair of magazines per size-class (NZONES) 293 * The loaded magazine will support immediate allocations, the previous 294 * magazine will either be full or empty and can be swapped at need 295 */ 296 typedef struct magazine_pair { 297 struct magazine *loaded; 298 struct magazine *prev; 299 } magazine_pair; 300 301 /* A depot is a collection of magazines for a single zone. */ 302 typedef struct magazine_depot { 303 struct magazinelist full; 304 struct magazinelist empty; 305 spinlock_t lock; 306 } magazine_depot; 307 308 typedef struct thr_mags { 309 magazine_pair mags[NZONES]; 310 struct magazine *newmag; 311 int init; 312 } thr_mags; 313 314 /* 315 * With this attribute set, do not require a function call for accessing 316 * this variable when the code is compiled -fPIC. 317 * 318 * Must be empty for libc_rtld (similar to __thread). 319 */ 320 #ifdef __LIBC_RTLD 321 #define TLS_ATTRIBUTE 322 #else 323 #define TLS_ATTRIBUTE __attribute__ ((tls_model ("initial-exec"))) 324 #endif 325 326 static __thread thr_mags thread_mags TLS_ATTRIBUTE; 327 static pthread_key_t thread_mags_key; 328 static pthread_once_t thread_mags_once = PTHREAD_ONCE_INIT; 329 static magazine_depot depots[NZONES]; 330 331 /* 332 * Fixed globals (not per-cpu) 333 */ 334 static const int ZoneSize = ZALLOC_ZONE_SIZE; 335 static const int ZoneLimit = ZALLOC_ZONE_LIMIT; 336 static const int ZonePageCount = ZALLOC_ZONE_SIZE / PAGE_SIZE; 337 static const int ZoneMask = ZALLOC_ZONE_SIZE - 1; 338 339 static int opt_madvise = 0; 340 static int opt_utrace = 0; 341 static int g_malloc_flags = 0; 342 static struct slglobaldata SLGlobalData; 343 static bigalloc_t bigalloc_array[BIGHSIZE]; 344 static spinlock_t bigspin_array[BIGXSIZE]; 345 static volatile void *bigcache_array[BIGCACHE]; /* atomic swap */ 346 static volatile size_t bigcache_size_array[BIGCACHE]; /* SMP races ok */ 347 static volatile int bigcache_index; /* SMP races ok */ 348 static int malloc_panic; 349 static size_t excess_alloc; /* excess big allocs */ 350 351 static void *_slaballoc(size_t size, int flags); 352 static void *_slabrealloc(void *ptr, size_t size); 353 static void _slabfree(void *ptr, int, bigalloc_t *); 354 static int _slabmemalign(void **memptr, size_t alignment, size_t size); 355 static void *_vmem_alloc(size_t bytes, size_t align, int flags); 356 static void _vmem_free(void *ptr, size_t bytes); 357 static void *magazine_alloc(struct magazine *, int *); 358 static int magazine_free(struct magazine *, void *); 359 static void *mtmagazine_alloc(int zi); 360 static int mtmagazine_free(int zi, void *); 361 static void mtmagazine_init(void); 362 static void mtmagazine_destructor(void *); 363 static slzone_t zone_alloc(int flags); 364 static void zone_free(void *z); 365 static void _mpanic(const char *ctl, ...) __printflike(1, 2); 366 static void malloc_init(void) __constructor(101); 367 368 struct nmalloc_utrace { 369 void *p; 370 size_t s; 371 void *r; 372 }; 373 374 #define UTRACE(a, b, c) \ 375 if (opt_utrace) { \ 376 struct nmalloc_utrace ut = { \ 377 .p = (a), \ 378 .s = (b), \ 379 .r = (c) \ 380 }; \ 381 utrace(&ut, sizeof(ut)); \ 382 } 383 384 static void 385 malloc_init(void) 386 { 387 const char *p = NULL; 388 389 if (issetugid() == 0) 390 p = getenv("MALLOC_OPTIONS"); 391 392 for (; p != NULL && *p != '\0'; p++) { 393 switch(*p) { 394 case 'u': opt_utrace = 0; break; 395 case 'U': opt_utrace = 1; break; 396 case 'h': opt_madvise = 0; break; 397 case 'H': opt_madvise = 1; break; 398 case 'z': g_malloc_flags = 0; break; 399 case 'Z': g_malloc_flags = SAFLAG_ZERO; break; 400 default: 401 break; 402 } 403 } 404 405 UTRACE((void *) -1, 0, NULL); 406 } 407 408 /* 409 * We have to install a handler for nmalloc thread teardowns when 410 * the thread is created. We cannot delay this because destructors in 411 * sophisticated userland programs can call malloc() for the first time 412 * during their thread exit. 413 * 414 * This routine is called directly from pthreads. 415 */ 416 void 417 _nmalloc_thr_init(void) 418 { 419 static int init_once; 420 thr_mags *tp; 421 422 /* 423 * Disallow mtmagazine operations until the mtmagazine is 424 * initialized. 425 */ 426 tp = &thread_mags; 427 tp->init = -1; 428 429 if (init_once == 0) { 430 init_once = 1; 431 pthread_once(&thread_mags_once, mtmagazine_init); 432 } 433 pthread_setspecific(thread_mags_key, tp); 434 tp->init = 1; 435 } 436 437 void 438 _nmalloc_thr_prepfork(void) 439 { 440 if (__isthreaded) { 441 _SPINLOCK(&zone_mag_lock); 442 _SPINLOCK(&depot_spinlock); 443 } 444 } 445 446 void 447 _nmalloc_thr_parentfork(void) 448 { 449 if (__isthreaded) { 450 _SPINUNLOCK(&depot_spinlock); 451 _SPINUNLOCK(&zone_mag_lock); 452 } 453 } 454 455 void 456 _nmalloc_thr_childfork(void) 457 { 458 if (__isthreaded) { 459 _SPINUNLOCK(&depot_spinlock); 460 _SPINUNLOCK(&zone_mag_lock); 461 } 462 } 463 464 /* 465 * Thread locks. 466 */ 467 static __inline void 468 slgd_lock(slglobaldata_t slgd) 469 { 470 if (__isthreaded) 471 _SPINLOCK(&slgd->Spinlock); 472 } 473 474 static __inline void 475 slgd_unlock(slglobaldata_t slgd) 476 { 477 if (__isthreaded) 478 _SPINUNLOCK(&slgd->Spinlock); 479 } 480 481 static __inline void 482 depot_lock(magazine_depot *dp) 483 { 484 if (__isthreaded) 485 _SPINLOCK(&depot_spinlock); 486 #if 0 487 if (__isthreaded) 488 _SPINLOCK(&dp->lock); 489 #endif 490 } 491 492 static __inline void 493 depot_unlock(magazine_depot *dp) 494 { 495 if (__isthreaded) 496 _SPINUNLOCK(&depot_spinlock); 497 #if 0 498 if (__isthreaded) 499 _SPINUNLOCK(&dp->lock); 500 #endif 501 } 502 503 static __inline void 504 zone_magazine_lock(void) 505 { 506 if (__isthreaded) 507 _SPINLOCK(&zone_mag_lock); 508 } 509 510 static __inline void 511 zone_magazine_unlock(void) 512 { 513 if (__isthreaded) 514 _SPINUNLOCK(&zone_mag_lock); 515 } 516 517 static __inline void 518 swap_mags(magazine_pair *mp) 519 { 520 struct magazine *tmp; 521 tmp = mp->loaded; 522 mp->loaded = mp->prev; 523 mp->prev = tmp; 524 } 525 526 /* 527 * bigalloc hashing and locking support. 528 * 529 * Return an unmasked hash code for the passed pointer. 530 */ 531 static __inline int 532 _bigalloc_hash(void *ptr) 533 { 534 int hv; 535 536 hv = ((int)(intptr_t)ptr >> PAGE_SHIFT) ^ 537 ((int)(intptr_t)ptr >> (PAGE_SHIFT + BIGHSHIFT)); 538 539 return(hv); 540 } 541 542 /* 543 * Lock the hash chain and return a pointer to its base for the specified 544 * address. 545 */ 546 static __inline bigalloc_t * 547 bigalloc_lock(void *ptr) 548 { 549 int hv = _bigalloc_hash(ptr); 550 bigalloc_t *bigp; 551 552 bigp = &bigalloc_array[hv & BIGHMASK]; 553 if (__isthreaded) 554 _SPINLOCK(&bigspin_array[hv & BIGXMASK]); 555 return(bigp); 556 } 557 558 /* 559 * Lock the hash chain and return a pointer to its base for the specified 560 * address. 561 * 562 * BUT, if the hash chain is empty, just return NULL and do not bother 563 * to lock anything. 564 */ 565 static __inline bigalloc_t * 566 bigalloc_check_and_lock(void *ptr) 567 { 568 int hv = _bigalloc_hash(ptr); 569 bigalloc_t *bigp; 570 571 bigp = &bigalloc_array[hv & BIGHMASK]; 572 if (*bigp == NULL) 573 return(NULL); 574 if (__isthreaded) { 575 _SPINLOCK(&bigspin_array[hv & BIGXMASK]); 576 } 577 return(bigp); 578 } 579 580 static __inline void 581 bigalloc_unlock(void *ptr) 582 { 583 int hv; 584 585 if (__isthreaded) { 586 hv = _bigalloc_hash(ptr); 587 _SPINUNLOCK(&bigspin_array[hv & BIGXMASK]); 588 } 589 } 590 591 /* 592 * Find a bigcache entry that might work for the allocation. SMP races are 593 * ok here except for the swap (that is, it is ok if bigcache_size_array[i] 594 * is wrong or if a NULL or too-small big is returned). 595 * 596 * Generally speaking it is ok to find a large entry even if the bytes 597 * requested are relatively small (but still oversized), because we really 598 * don't know *what* the application is going to do with the buffer. 599 */ 600 static __inline 601 bigalloc_t 602 bigcache_find_alloc(size_t bytes) 603 { 604 bigalloc_t big = NULL; 605 size_t test; 606 int i; 607 608 for (i = 0; i < BIGCACHE; ++i) { 609 test = bigcache_size_array[i]; 610 if (bytes <= test) { 611 bigcache_size_array[i] = 0; 612 big = atomic_swap_ptr(&bigcache_array[i], NULL); 613 break; 614 } 615 } 616 return big; 617 } 618 619 /* 620 * Free a bigcache entry, possibly returning one that the caller really must 621 * free. This is used to cache recent oversized memory blocks. Only 622 * big blocks smaller than BIGCACHE_LIMIT will be cached this way, so try 623 * to collect the biggest ones we can that are under the limit. 624 */ 625 static __inline 626 bigalloc_t 627 bigcache_find_free(bigalloc_t big) 628 { 629 int i; 630 int j; 631 int b; 632 633 b = ++bigcache_index; 634 for (i = 0; i < BIGCACHE; ++i) { 635 j = (b + i) & BIGCACHE_MASK; 636 if (bigcache_size_array[j] < big->bytes) { 637 bigcache_size_array[j] = big->bytes; 638 big = atomic_swap_ptr(&bigcache_array[j], big); 639 break; 640 } 641 } 642 return big; 643 } 644 645 static __inline 646 void 647 handle_excess_big(void) 648 { 649 int i; 650 bigalloc_t big; 651 bigalloc_t *bigp; 652 653 if (excess_alloc <= BIGCACHE_EXCESS) 654 return; 655 656 for (i = 0; i < BIGHSIZE; ++i) { 657 bigp = &bigalloc_array[i]; 658 if (*bigp == NULL) 659 continue; 660 if (__isthreaded) 661 _SPINLOCK(&bigspin_array[i & BIGXMASK]); 662 for (big = *bigp; big; big = big->next) { 663 if (big->active < big->bytes) { 664 MASSERT((big->active & PAGE_MASK) == 0); 665 MASSERT((big->bytes & PAGE_MASK) == 0); 666 munmap((char *)big->base + big->active, 667 big->bytes - big->active); 668 atomic_add_long(&excess_alloc, 669 big->active - big->bytes); 670 big->bytes = big->active; 671 } 672 } 673 if (__isthreaded) 674 _SPINUNLOCK(&bigspin_array[i & BIGXMASK]); 675 } 676 } 677 678 /* 679 * Calculate the zone index for the allocation request size and set the 680 * allocation request size to that particular zone's chunk size. 681 */ 682 static __inline int 683 zoneindex(size_t *bytes, size_t *chunking) 684 { 685 size_t n = (unsigned int)*bytes; /* unsigned for shift opt */ 686 687 /* 688 * This used to be 8-byte chunks and 16 zones for n < 128. 689 * However some instructions may require 16-byte alignment 690 * (aka SIMD) and programs might not request an aligned size 691 * (aka GCC-7), so change this as follows: 692 * 693 * 0-15 bytes 8-byte alignment in two zones (0-1) 694 * 16-127 bytes 16-byte alignment in four zones (3-10) 695 * zone index 2 and 11-15 are currently unused. 696 */ 697 if (n < 16) { 698 *bytes = n = (n + 7) & ~7; 699 *chunking = 8; 700 return(n / 8 - 1); /* 8 byte chunks, 2 zones */ 701 /* zones 0,1, zone 2 is unused */ 702 } 703 if (n < 128) { 704 *bytes = n = (n + 15) & ~15; 705 *chunking = 16; 706 return(n / 16 + 2); /* 16 byte chunks, 8 zones */ 707 /* zones 3-10, zones 11-15 unused */ 708 } 709 if (n < 256) { 710 *bytes = n = (n + 15) & ~15; 711 *chunking = 16; 712 return(n / 16 + 7); 713 } 714 if (n < 8192) { 715 if (n < 512) { 716 *bytes = n = (n + 31) & ~31; 717 *chunking = 32; 718 return(n / 32 + 15); 719 } 720 if (n < 1024) { 721 *bytes = n = (n + 63) & ~63; 722 *chunking = 64; 723 return(n / 64 + 23); 724 } 725 if (n < 2048) { 726 *bytes = n = (n + 127) & ~127; 727 *chunking = 128; 728 return(n / 128 + 31); 729 } 730 if (n < 4096) { 731 *bytes = n = (n + 255) & ~255; 732 *chunking = 256; 733 return(n / 256 + 39); 734 } 735 *bytes = n = (n + 511) & ~511; 736 *chunking = 512; 737 return(n / 512 + 47); 738 } 739 #if ZALLOC_ZONE_LIMIT > 8192 740 if (n < 16384) { 741 *bytes = n = (n + 1023) & ~1023; 742 *chunking = 1024; 743 return(n / 1024 + 55); 744 } 745 #endif 746 #if ZALLOC_ZONE_LIMIT > 16384 747 if (n < 32768) { 748 *bytes = n = (n + 2047) & ~2047; 749 *chunking = 2048; 750 return(n / 2048 + 63); 751 } 752 #endif 753 _mpanic("Unexpected byte count %zu", n); 754 return(0); 755 } 756 757 /* 758 * malloc() - call internal slab allocator 759 */ 760 void * 761 __malloc(size_t size) 762 { 763 void *ptr; 764 765 ptr = _slaballoc(size, 0); 766 if (ptr == NULL) 767 errno = ENOMEM; 768 else 769 UTRACE(0, size, ptr); 770 return(ptr); 771 } 772 773 #define MUL_NO_OVERFLOW (1UL << (sizeof(size_t) * 4)) 774 775 /* 776 * calloc() - call internal slab allocator 777 */ 778 void * 779 __calloc(size_t number, size_t size) 780 { 781 void *ptr; 782 783 if ((number >= MUL_NO_OVERFLOW || size >= MUL_NO_OVERFLOW) && 784 number > 0 && SIZE_MAX / number < size) { 785 errno = ENOMEM; 786 return(NULL); 787 } 788 789 ptr = _slaballoc(number * size, SAFLAG_ZERO); 790 if (ptr == NULL) 791 errno = ENOMEM; 792 else 793 UTRACE(0, number * size, ptr); 794 return(ptr); 795 } 796 797 /* 798 * realloc() (SLAB ALLOCATOR) 799 * 800 * We do not attempt to optimize this routine beyond reusing the same 801 * pointer if the new size fits within the chunking of the old pointer's 802 * zone. 803 */ 804 void * 805 __realloc(void *ptr, size_t size) 806 { 807 void *ret; 808 ret = _slabrealloc(ptr, size); 809 if (ret == NULL) 810 errno = ENOMEM; 811 else 812 UTRACE(ptr, size, ret); 813 return(ret); 814 } 815 816 /* 817 * aligned_alloc() 818 * 819 * Allocate (size) bytes with a alignment of (alignment). 820 */ 821 void * 822 __aligned_alloc(size_t alignment, size_t size) 823 { 824 void *ptr; 825 int rc; 826 827 ptr = NULL; 828 rc = _slabmemalign(&ptr, alignment, size); 829 if (rc) 830 errno = rc; 831 832 return (ptr); 833 } 834 835 /* 836 * posix_memalign() 837 * 838 * Allocate (size) bytes with a alignment of (alignment), where (alignment) 839 * is a power of 2 >= sizeof(void *). 840 */ 841 int 842 __posix_memalign(void **memptr, size_t alignment, size_t size) 843 { 844 int rc; 845 846 /* 847 * OpenGroup spec issue 6 check 848 */ 849 if (alignment < sizeof(void *)) { 850 *memptr = NULL; 851 return(EINVAL); 852 } 853 854 rc = _slabmemalign(memptr, alignment, size); 855 856 return (rc); 857 } 858 859 /* 860 * The slab allocator will allocate on power-of-2 boundaries up to 861 * at least PAGE_SIZE. We use the zoneindex mechanic to find a 862 * zone matching the requirements, and _vmem_alloc() otherwise. 863 */ 864 static int 865 _slabmemalign(void **memptr, size_t alignment, size_t size) 866 { 867 bigalloc_t *bigp; 868 bigalloc_t big; 869 size_t chunking; 870 int zi __unused; 871 872 if (alignment < 1) { 873 *memptr = NULL; 874 return(EINVAL); 875 } 876 877 /* 878 * OpenGroup spec issue 6 checks 879 */ 880 if ((alignment | (alignment - 1)) + 1 != (alignment << 1)) { 881 *memptr = NULL; 882 return(EINVAL); 883 } 884 885 /* 886 * Our zone mechanism guarantees same-sized alignment for any 887 * power-of-2 allocation. If size is a power-of-2 and reasonable 888 * we can just call _slaballoc() and be done. We round size up 889 * to the nearest alignment boundary to improve our odds of 890 * it becoming a power-of-2 if it wasn't before. 891 */ 892 if (size <= alignment) 893 size = alignment; 894 else 895 size = (size + alignment - 1) & ~(size_t)(alignment - 1); 896 897 /* 898 * If we have overflowed above when rounding to the nearest alignment 899 * boundary, just return ENOMEM, size should be == N * sizeof(void *). 900 * 901 * Power-of-2 allocations up to 8KB will be aligned to the allocation 902 * size and _slaballoc() can simply be used. Please see line 1082 903 * for this special case: 'Align the storage in the zone based on 904 * the chunking' has a special case for powers of 2. 905 */ 906 if (size == 0) 907 return(ENOMEM); 908 909 if (size <= PAGE_SIZE*2 && (size | (size - 1)) + 1 == (size << 1)) { 910 *memptr = _slaballoc(size, 0); 911 return(*memptr ? 0 : ENOMEM); 912 } 913 914 /* 915 * Otherwise locate a zone with a chunking that matches 916 * the requested alignment, within reason. Consider two cases: 917 * 918 * (1) A 1K allocation on a 32-byte alignment. The first zoneindex 919 * we find will be the best fit because the chunking will be 920 * greater or equal to the alignment. 921 * 922 * (2) A 513 allocation on a 256-byte alignment. In this case 923 * the first zoneindex we find will be for 576 byte allocations 924 * with a chunking of 64, which is not sufficient. To fix this 925 * we simply find the nearest power-of-2 >= size and use the 926 * same side-effect of _slaballoc() which guarantees 927 * same-alignment on a power-of-2 allocation. 928 */ 929 if (size < PAGE_SIZE) { 930 zi = zoneindex(&size, &chunking); 931 if (chunking >= alignment) { 932 *memptr = _slaballoc(size, 0); 933 return(*memptr ? 0 : ENOMEM); 934 } 935 if (size >= 1024) 936 alignment = 1024; 937 if (size >= 16384) 938 alignment = 16384; 939 while (alignment < size) 940 alignment <<= 1; 941 *memptr = _slaballoc(alignment, 0); 942 return(*memptr ? 0 : ENOMEM); 943 } 944 945 /* 946 * If the slab allocator cannot handle it use vmem_alloc(). 947 * 948 * Alignment must be adjusted up to at least PAGE_SIZE in this case. 949 */ 950 if (alignment < PAGE_SIZE) 951 alignment = PAGE_SIZE; 952 if (size < alignment) 953 size = alignment; 954 size = (size + PAGE_MASK) & ~(size_t)PAGE_MASK; 955 if (alignment == PAGE_SIZE && size <= BIGCACHE_LIMIT) { 956 big = bigcache_find_alloc(size); 957 if (big && big->bytes < size) { 958 _slabfree(big->base, FASTSLABREALLOC, &big); 959 big = NULL; 960 } 961 if (big) { 962 *memptr = big->base; 963 big->active = size; 964 if (big->active < big->bytes) { 965 atomic_add_long(&excess_alloc, 966 big->bytes - big->active); 967 } 968 bigp = bigalloc_lock(*memptr); 969 big->next = *bigp; 970 *bigp = big; 971 bigalloc_unlock(*memptr); 972 handle_excess_big(); 973 return(0); 974 } 975 } 976 *memptr = _vmem_alloc(size, alignment, 0); 977 if (*memptr == NULL) 978 return(ENOMEM); 979 980 big = _slaballoc(sizeof(struct bigalloc), 0); 981 if (big == NULL) { 982 _vmem_free(*memptr, size); 983 *memptr = NULL; 984 return(ENOMEM); 985 } 986 bigp = bigalloc_lock(*memptr); 987 big->base = *memptr; 988 big->active = size; 989 big->bytes = size; /* no excess */ 990 big->next = *bigp; 991 *bigp = big; 992 bigalloc_unlock(*memptr); 993 994 return(0); 995 } 996 997 /* 998 * free() (SLAB ALLOCATOR) - do the obvious 999 */ 1000 void 1001 __free(void *ptr) 1002 { 1003 UTRACE(ptr, 0, 0); 1004 _slabfree(ptr, 0, NULL); 1005 } 1006 1007 /* 1008 * _slaballoc() (SLAB ALLOCATOR) 1009 * 1010 * Allocate memory via the slab allocator. If the request is too large, 1011 * or if it page-aligned beyond a certain size, we fall back to the 1012 * KMEM subsystem 1013 */ 1014 static void * 1015 _slaballoc(size_t size, int flags) 1016 { 1017 slzone_t z; 1018 slchunk_t chunk; 1019 slglobaldata_t slgd; 1020 size_t chunking; 1021 int zi; 1022 int off; 1023 void *obj; 1024 1025 /* 1026 * Handle the degenerate size == 0 case. Yes, this does happen. 1027 * Return a special pointer. This is to maintain compatibility with 1028 * the original malloc implementation. Certain devices, such as the 1029 * adaptec driver, not only allocate 0 bytes, they check for NULL and 1030 * also realloc() later on. Joy. 1031 */ 1032 if (size == 0) 1033 size = 1; 1034 1035 /* Capture global flags */ 1036 flags |= g_malloc_flags; 1037 1038 /* 1039 * Handle large allocations directly. There should not be very many 1040 * of these so performance is not a big issue. 1041 * 1042 * The backend allocator is pretty nasty on a SMP system. Use the 1043 * slab allocator for one and two page-sized chunks even though we 1044 * lose some efficiency. 1045 * 1046 * NOTE: Please see posix_memalign around line 864, which assumes 1047 * that power-of-2 allocations of PAGE_SIZE and PAGE_SIZE*2 1048 * can use _slaballoc() and be aligned to the same. The 1049 * zone cache can be used for this case, bigalloc does not 1050 * have to be used. 1051 */ 1052 if (size >= ZoneLimit || 1053 ((size & PAGE_MASK) == 0 && size > PAGE_SIZE*2)) { 1054 bigalloc_t big; 1055 bigalloc_t *bigp; 1056 1057 /* 1058 * Page-align and cache-color in case of virtually indexed 1059 * physically tagged L1 caches (aka SandyBridge). No sweat 1060 * otherwise, so just do it. 1061 * 1062 * (don't count as excess). 1063 */ 1064 size = (size + PAGE_MASK) & ~(size_t)PAGE_MASK; 1065 1066 /* 1067 * If we have overflowed above when rounding to the page 1068 * boundary, something has passed us (size_t)[-PAGE_MASK..-1] 1069 * so just return NULL, size at this point should be >= 0. 1070 */ 1071 if (size == 0) 1072 return (NULL); 1073 1074 if ((size & (PAGE_SIZE * 2 - 1)) == 0) 1075 size += PAGE_SIZE; 1076 1077 /* 1078 * Try to reuse a cached big block to avoid mmap'ing. If it 1079 * turns out not to fit our requirements we throw it away 1080 * and allocate normally. 1081 */ 1082 big = NULL; 1083 if (size <= BIGCACHE_LIMIT) { 1084 big = bigcache_find_alloc(size); 1085 if (big && big->bytes < size) { 1086 _slabfree(big->base, FASTSLABREALLOC, &big); 1087 big = NULL; 1088 } 1089 } 1090 if (big) { 1091 chunk = big->base; 1092 if (flags & SAFLAG_ZERO) 1093 bzero(chunk, size); 1094 } else { 1095 chunk = _vmem_alloc(size, PAGE_SIZE, flags); 1096 if (chunk == NULL) 1097 return(NULL); 1098 1099 big = _slaballoc(sizeof(struct bigalloc), 0); 1100 if (big == NULL) { 1101 _vmem_free(chunk, size); 1102 return(NULL); 1103 } 1104 big->base = chunk; 1105 big->bytes = size; 1106 } 1107 big->active = size; 1108 1109 bigp = bigalloc_lock(chunk); 1110 if (big->active < big->bytes) { 1111 atomic_add_long(&excess_alloc, 1112 big->bytes - big->active); 1113 } 1114 big->next = *bigp; 1115 *bigp = big; 1116 bigalloc_unlock(chunk); 1117 handle_excess_big(); 1118 1119 return(chunk); 1120 } 1121 1122 /* Compute allocation zone; zoneindex will panic on excessive sizes */ 1123 zi = zoneindex(&size, &chunking); 1124 MASSERT(zi < NZONES); 1125 1126 obj = mtmagazine_alloc(zi); 1127 if (obj != NULL) { 1128 if (flags & SAFLAG_ZERO) 1129 bzero(obj, size); 1130 return (obj); 1131 } 1132 1133 slgd = &SLGlobalData; 1134 slgd_lock(slgd); 1135 1136 /* 1137 * Attempt to allocate out of an existing zone. If all zones are 1138 * exhausted pull one off the free list or allocate a new one. 1139 */ 1140 if ((z = slgd->ZoneAry[zi]) == NULL) { 1141 z = zone_alloc(flags); 1142 if (z == NULL) 1143 goto fail; 1144 1145 /* 1146 * How big is the base structure? 1147 */ 1148 off = sizeof(struct slzone); 1149 1150 /* 1151 * Align the storage in the zone based on the chunking. 1152 * 1153 * Guarantee power-of-2 alignment for power-of-2-sized 1154 * chunks. Otherwise align based on the chunking size 1155 * (typically 8 or 16 bytes for small allocations). 1156 * 1157 * NOTE: Allocations >= ZoneLimit are governed by the 1158 * bigalloc code and typically only guarantee page-alignment. 1159 * 1160 * Set initial conditions for UIndex near the zone header 1161 * to reduce unecessary page faults, vs semi-randomization 1162 * to improve L1 cache saturation. 1163 * 1164 * NOTE: Please see posix_memalign around line 864-ish, which 1165 * assumes that power-of-2 allocations of PAGE_SIZE 1166 * and PAGE_SIZE*2 can use _slaballoc() and be aligned 1167 * to the same. The zone cache can be used for this 1168 * case, bigalloc does not have to be used. 1169 * 1170 * ALL power-of-2 requests that fall through to this 1171 * code use this rule (conditionals above limit this 1172 * to <= PAGE_SIZE*2. 1173 */ 1174 if ((size | (size - 1)) + 1 == (size << 1)) 1175 off = roundup2(off, size); 1176 else 1177 off = roundup2(off, chunking); 1178 z->z_Magic = ZALLOC_SLAB_MAGIC; 1179 z->z_ZoneIndex = zi; 1180 z->z_NMax = (ZoneSize - off) / size; 1181 z->z_NFree = z->z_NMax; 1182 z->z_BasePtr = (char *)z + off; 1183 z->z_UIndex = z->z_UEndIndex = 0; 1184 z->z_ChunkSize = size; 1185 z->z_FirstFreePg = ZonePageCount; 1186 z->z_Next = slgd->ZoneAry[zi]; 1187 slgd->ZoneAry[zi] = z; 1188 if ((z->z_Flags & SLZF_UNOTZEROD) == 0) { 1189 flags &= ~SAFLAG_ZERO; /* already zero'd */ 1190 flags |= SAFLAG_PASSIVE; 1191 } 1192 1193 /* 1194 * Slide the base index for initial allocations out of the 1195 * next zone we create so we do not over-weight the lower 1196 * part of the cpu memory caches. 1197 */ 1198 slgd->JunkIndex = (slgd->JunkIndex + ZALLOC_SLAB_SLIDE) 1199 & (ZALLOC_MAX_ZONE_SIZE - 1); 1200 } 1201 1202 /* 1203 * Ok, we have a zone from which at least one chunk is available. 1204 * 1205 * Remove us from the ZoneAry[] when we become empty 1206 */ 1207 MASSERT(z->z_NFree > 0); 1208 1209 if (--z->z_NFree == 0) { 1210 slgd->ZoneAry[zi] = z->z_Next; 1211 z->z_Next = NULL; 1212 } 1213 1214 /* 1215 * Locate a chunk in a free page. This attempts to localize 1216 * reallocations into earlier pages without us having to sort 1217 * the chunk list. A chunk may still overlap a page boundary. 1218 */ 1219 while (z->z_FirstFreePg < ZonePageCount) { 1220 if ((chunk = z->z_PageAry[z->z_FirstFreePg]) != NULL) { 1221 MASSERT((uintptr_t)chunk & ZoneMask); 1222 z->z_PageAry[z->z_FirstFreePg] = chunk->c_Next; 1223 goto done; 1224 } 1225 ++z->z_FirstFreePg; 1226 } 1227 1228 /* 1229 * No chunks are available but NFree said we had some memory, 1230 * so it must be available in the never-before-used-memory 1231 * area governed by UIndex. The consequences are very 1232 * serious if our zone got corrupted so we use an explicit 1233 * panic rather then a KASSERT. 1234 */ 1235 chunk = (slchunk_t)(z->z_BasePtr + z->z_UIndex * size); 1236 1237 if (++z->z_UIndex == z->z_NMax) 1238 z->z_UIndex = 0; 1239 if (z->z_UIndex == z->z_UEndIndex) { 1240 if (z->z_NFree != 0) 1241 _mpanic("slaballoc: corrupted zone"); 1242 } 1243 1244 if ((z->z_Flags & SLZF_UNOTZEROD) == 0) { 1245 flags &= ~SAFLAG_ZERO; 1246 flags |= SAFLAG_PASSIVE; 1247 } 1248 1249 done: 1250 slgd_unlock(slgd); 1251 if (flags & SAFLAG_ZERO) 1252 bzero(chunk, size); 1253 return(chunk); 1254 fail: 1255 slgd_unlock(slgd); 1256 return(NULL); 1257 } 1258 1259 /* 1260 * Reallocate memory within the chunk 1261 */ 1262 static void * 1263 _slabrealloc(void *ptr, size_t size) 1264 { 1265 bigalloc_t *bigp; 1266 void *nptr; 1267 slzone_t z; 1268 size_t chunking; 1269 1270 if (ptr == NULL) { 1271 return(_slaballoc(size, 0)); 1272 } 1273 1274 if (size == 0) 1275 size = 1; 1276 1277 /* 1278 * Handle oversized allocations. 1279 */ 1280 if ((bigp = bigalloc_check_and_lock(ptr)) != NULL) { 1281 bigalloc_t big; 1282 size_t bigbytes; 1283 1284 while ((big = *bigp) != NULL) { 1285 if (big->base == ptr) { 1286 size = (size + PAGE_MASK) & ~(size_t)PAGE_MASK; 1287 bigbytes = big->bytes; 1288 1289 /* 1290 * If it already fits determine if it makes 1291 * sense to shrink/reallocate. Try to optimize 1292 * programs which stupidly make incremental 1293 * reallocations larger or smaller by scaling 1294 * the allocation. Also deal with potential 1295 * coloring. 1296 */ 1297 if (size >= (bigbytes >> 1) && 1298 size <= bigbytes) { 1299 if (big->active != size) { 1300 atomic_add_long(&excess_alloc, 1301 big->active - 1302 size); 1303 } 1304 big->active = size; 1305 bigalloc_unlock(ptr); 1306 return(ptr); 1307 } 1308 1309 /* 1310 * For large reallocations, allocate more space 1311 * than we need to try to avoid excessive 1312 * reallocations later on. 1313 */ 1314 chunking = size + (size >> 3); 1315 chunking = (chunking + PAGE_MASK) & 1316 ~(size_t)PAGE_MASK; 1317 1318 /* 1319 * Try to allocate adjacently in case the 1320 * program is idiotically realloc()ing a 1321 * huge memory block just slightly bigger. 1322 * (llvm's llc tends to do this a lot). 1323 * 1324 * (MAP_TRYFIXED forces mmap to fail if there 1325 * is already something at the address). 1326 */ 1327 if (chunking > bigbytes) { 1328 char *addr; 1329 int errno_save = errno; 1330 1331 addr = mmap((char *)ptr + bigbytes, 1332 chunking - bigbytes, 1333 PROT_READ|PROT_WRITE, 1334 MAP_PRIVATE|MAP_ANON| 1335 MAP_TRYFIXED, 1336 -1, 0); 1337 errno = errno_save; 1338 if (addr == (char *)ptr + bigbytes) { 1339 atomic_add_long(&excess_alloc, 1340 big->active - 1341 big->bytes + 1342 chunking - 1343 size); 1344 big->bytes = chunking; 1345 big->active = size; 1346 bigalloc_unlock(ptr); 1347 1348 return(ptr); 1349 } 1350 MASSERT((void *)addr == MAP_FAILED); 1351 } 1352 1353 /* 1354 * Failed, unlink big and allocate fresh. 1355 * (note that we have to leave (big) intact 1356 * in case the slaballoc fails). 1357 */ 1358 *bigp = big->next; 1359 bigalloc_unlock(ptr); 1360 if ((nptr = _slaballoc(size, 0)) == NULL) { 1361 /* Relink block */ 1362 bigp = bigalloc_lock(ptr); 1363 big->next = *bigp; 1364 *bigp = big; 1365 bigalloc_unlock(ptr); 1366 return(NULL); 1367 } 1368 if (size > bigbytes) 1369 size = bigbytes; 1370 bcopy(ptr, nptr, size); 1371 atomic_add_long(&excess_alloc, big->active - 1372 big->bytes); 1373 _slabfree(ptr, FASTSLABREALLOC, &big); 1374 1375 return(nptr); 1376 } 1377 bigp = &big->next; 1378 } 1379 bigalloc_unlock(ptr); 1380 handle_excess_big(); 1381 } 1382 1383 /* 1384 * Get the original allocation's zone. If the new request winds 1385 * up using the same chunk size we do not have to do anything. 1386 * 1387 * NOTE: We don't have to lock the globaldata here, the fields we 1388 * access here will not change at least as long as we have control 1389 * over the allocation. 1390 */ 1391 z = (slzone_t)((uintptr_t)ptr & ~(uintptr_t)ZoneMask); 1392 MASSERT(z->z_Magic == ZALLOC_SLAB_MAGIC); 1393 1394 /* 1395 * Use zoneindex() to chunk-align the new size, as long as the 1396 * new size is not too large. 1397 */ 1398 if (size < ZoneLimit) { 1399 zoneindex(&size, &chunking); 1400 if (z->z_ChunkSize == size) { 1401 return(ptr); 1402 } 1403 } 1404 1405 /* 1406 * Allocate memory for the new request size and copy as appropriate. 1407 */ 1408 if ((nptr = _slaballoc(size, 0)) != NULL) { 1409 if (size > z->z_ChunkSize) 1410 size = z->z_ChunkSize; 1411 bcopy(ptr, nptr, size); 1412 _slabfree(ptr, 0, NULL); 1413 } 1414 1415 return(nptr); 1416 } 1417 1418 /* 1419 * free (SLAB ALLOCATOR) 1420 * 1421 * Free a memory block previously allocated by malloc. Note that we do not 1422 * attempt to uplodate ks_loosememuse as MP races could prevent us from 1423 * checking memory limits in malloc. 1424 * 1425 * flags: 1426 * FASTSLABREALLOC Fast call from realloc, *rbigp already 1427 * unlinked. 1428 * 1429 * MPSAFE 1430 */ 1431 static void 1432 _slabfree(void *ptr, int flags, bigalloc_t *rbigp) 1433 { 1434 slzone_t z; 1435 slchunk_t chunk; 1436 bigalloc_t big; 1437 bigalloc_t *bigp; 1438 slglobaldata_t slgd; 1439 size_t size; 1440 int zi; 1441 int pgno; 1442 1443 /* Fast realloc path for big allocations */ 1444 if (flags & FASTSLABREALLOC) { 1445 big = *rbigp; 1446 goto fastslabrealloc; 1447 } 1448 1449 /* 1450 * Handle NULL frees and special 0-byte allocations 1451 */ 1452 if (ptr == NULL) 1453 return; 1454 1455 /* 1456 * Handle oversized allocations. 1457 */ 1458 if ((bigp = bigalloc_check_and_lock(ptr)) != NULL) { 1459 while ((big = *bigp) != NULL) { 1460 if (big->base == ptr) { 1461 *bigp = big->next; 1462 atomic_add_long(&excess_alloc, big->active - 1463 big->bytes); 1464 bigalloc_unlock(ptr); 1465 1466 /* 1467 * Try to stash the block we are freeing, 1468 * potentially receiving another block in 1469 * return which must be freed. 1470 */ 1471 fastslabrealloc: 1472 if (big->bytes <= BIGCACHE_LIMIT) { 1473 big = bigcache_find_free(big); 1474 if (big == NULL) 1475 return; 1476 } 1477 ptr = big->base; /* reload */ 1478 size = big->bytes; 1479 _slabfree(big, 0, NULL); 1480 _vmem_free(ptr, size); 1481 return; 1482 } 1483 bigp = &big->next; 1484 } 1485 bigalloc_unlock(ptr); 1486 handle_excess_big(); 1487 } 1488 1489 /* 1490 * Zone case. Figure out the zone based on the fact that it is 1491 * ZoneSize aligned. 1492 */ 1493 z = (slzone_t)((uintptr_t)ptr & ~(uintptr_t)ZoneMask); 1494 MASSERT(z->z_Magic == ZALLOC_SLAB_MAGIC); 1495 1496 size = z->z_ChunkSize; 1497 zi = z->z_ZoneIndex; 1498 1499 if (g_malloc_flags & SAFLAG_ZERO) 1500 bzero(ptr, size); 1501 1502 if (mtmagazine_free(zi, ptr) == 0) 1503 return; 1504 1505 pgno = ((char *)ptr - (char *)z) >> PAGE_SHIFT; 1506 chunk = ptr; 1507 slgd = &SLGlobalData; 1508 slgd_lock(slgd); 1509 1510 /* 1511 * Add this free non-zero'd chunk to a linked list for reuse, adjust 1512 * z_FirstFreePg. 1513 */ 1514 chunk->c_Next = z->z_PageAry[pgno]; 1515 z->z_PageAry[pgno] = chunk; 1516 if (z->z_FirstFreePg > pgno) 1517 z->z_FirstFreePg = pgno; 1518 1519 /* 1520 * Bump the number of free chunks. If it becomes non-zero the zone 1521 * must be added back onto the appropriate list. 1522 */ 1523 if (z->z_NFree++ == 0) { 1524 z->z_Next = slgd->ZoneAry[z->z_ZoneIndex]; 1525 slgd->ZoneAry[z->z_ZoneIndex] = z; 1526 } 1527 1528 /* 1529 * If the zone becomes totally free then release it. 1530 */ 1531 if (z->z_NFree == z->z_NMax) { 1532 slzone_t *pz; 1533 1534 pz = &slgd->ZoneAry[z->z_ZoneIndex]; 1535 while (z != *pz) 1536 pz = &(*pz)->z_Next; 1537 *pz = z->z_Next; 1538 z->z_Magic = -1; 1539 z->z_Next = NULL; 1540 zone_free(z); 1541 /* slgd lock released */ 1542 return; 1543 } 1544 slgd_unlock(slgd); 1545 } 1546 1547 /* 1548 * Allocate and return a magazine. NULL is returned and *burst is adjusted 1549 * if the magazine is empty. 1550 */ 1551 static __inline void * 1552 magazine_alloc(struct magazine *mp, int *burst) 1553 { 1554 void *obj; 1555 1556 if (mp == NULL) 1557 return(NULL); 1558 if (MAGAZINE_NOTEMPTY(mp)) { 1559 obj = mp->objects[--mp->rounds]; 1560 return(obj); 1561 } 1562 1563 /* 1564 * Return burst factor to caller along with NULL 1565 */ 1566 if ((mp->flags & M_BURST) && (burst != NULL)) { 1567 *burst = mp->burst_factor; 1568 } 1569 /* Reduce burst factor by NSCALE; if it hits 1, disable BURST */ 1570 if ((mp->flags & M_BURST) && (mp->flags & M_BURST_EARLY) && 1571 (burst != NULL)) { 1572 mp->burst_factor -= M_BURST_NSCALE; 1573 if (mp->burst_factor <= 1) { 1574 mp->burst_factor = 1; 1575 mp->flags &= ~(M_BURST); 1576 mp->flags &= ~(M_BURST_EARLY); 1577 } 1578 } 1579 return (NULL); 1580 } 1581 1582 static __inline int 1583 magazine_free(struct magazine *mp, void *p) 1584 { 1585 if (mp != NULL && MAGAZINE_NOTFULL(mp)) { 1586 mp->objects[mp->rounds++] = p; 1587 return 0; 1588 } 1589 1590 return -1; 1591 } 1592 1593 static void * 1594 mtmagazine_alloc(int zi) 1595 { 1596 thr_mags *tp; 1597 struct magazine *mp, *emptymag; 1598 magazine_depot *d; 1599 void *obj; 1600 1601 /* 1602 * Do not try to access per-thread magazines while the mtmagazine 1603 * is being initialized or destroyed. 1604 */ 1605 tp = &thread_mags; 1606 if (tp->init < 0) 1607 return(NULL); 1608 1609 /* 1610 * Primary per-thread allocation loop 1611 */ 1612 for (;;) { 1613 /* 1614 * If the loaded magazine has rounds, allocate and return 1615 */ 1616 mp = tp->mags[zi].loaded; 1617 obj = magazine_alloc(mp, NULL); 1618 if (obj) 1619 break; 1620 1621 /* 1622 * If the prev magazine is full, swap with the loaded 1623 * magazine and retry. 1624 */ 1625 mp = tp->mags[zi].prev; 1626 if (mp && MAGAZINE_FULL(mp)) { 1627 MASSERT(mp->rounds != 0); 1628 swap_mags(&tp->mags[zi]); /* prev now empty */ 1629 continue; 1630 } 1631 1632 /* 1633 * Try to get a full magazine from the depot. Cycle 1634 * through depot(full)->loaded->prev->depot(empty). 1635 * Retry if a full magazine was available from the depot. 1636 * 1637 * Return NULL (caller will fall through) if no magazines 1638 * can be found anywhere. 1639 */ 1640 d = &depots[zi]; 1641 depot_lock(d); 1642 emptymag = tp->mags[zi].prev; 1643 if (emptymag) 1644 SLIST_INSERT_HEAD(&d->empty, emptymag, nextmagazine); 1645 tp->mags[zi].prev = tp->mags[zi].loaded; 1646 mp = SLIST_FIRST(&d->full); /* loaded magazine */ 1647 tp->mags[zi].loaded = mp; 1648 if (mp) { 1649 SLIST_REMOVE_HEAD(&d->full, nextmagazine); 1650 MASSERT(MAGAZINE_NOTEMPTY(mp)); 1651 depot_unlock(d); 1652 continue; 1653 } 1654 depot_unlock(d); 1655 break; 1656 } 1657 1658 return (obj); 1659 } 1660 1661 static int 1662 mtmagazine_free(int zi, void *ptr) 1663 { 1664 thr_mags *tp; 1665 struct magazine *mp, *loadedmag; 1666 magazine_depot *d; 1667 int rc = -1; 1668 1669 /* 1670 * Do not try to access per-thread magazines while the mtmagazine 1671 * is being initialized or destroyed. 1672 */ 1673 tp = &thread_mags; 1674 if (tp->init < 0) 1675 return(-1); 1676 1677 /* 1678 * Primary per-thread freeing loop 1679 */ 1680 for (;;) { 1681 /* 1682 * Make sure a new magazine is available in case we have 1683 * to use it. Staging the newmag allows us to avoid 1684 * some locking/reentrancy complexity. 1685 * 1686 * Temporarily disable the per-thread caches for this 1687 * allocation to avoid reentrancy and/or to avoid a 1688 * stack overflow if the [zi] happens to be the same that 1689 * would be used to allocate the new magazine. 1690 */ 1691 if (tp->newmag == NULL) { 1692 tp->init = -1; 1693 tp->newmag = _slaballoc(sizeof(struct magazine), 1694 SAFLAG_ZERO); 1695 tp->init = 1; 1696 if (tp->newmag == NULL) { 1697 rc = -1; 1698 break; 1699 } 1700 } 1701 1702 /* 1703 * If the loaded magazine has space, free directly to it 1704 */ 1705 rc = magazine_free(tp->mags[zi].loaded, ptr); 1706 if (rc == 0) 1707 break; 1708 1709 /* 1710 * If the prev magazine is empty, swap with the loaded 1711 * magazine and retry. 1712 */ 1713 mp = tp->mags[zi].prev; 1714 if (mp && MAGAZINE_EMPTY(mp)) { 1715 MASSERT(mp->rounds == 0); 1716 swap_mags(&tp->mags[zi]); /* prev now full */ 1717 continue; 1718 } 1719 1720 /* 1721 * Try to get an empty magazine from the depot. Cycle 1722 * through depot(empty)->loaded->prev->depot(full). 1723 * Retry if an empty magazine was available from the depot. 1724 */ 1725 d = &depots[zi]; 1726 depot_lock(d); 1727 1728 if ((loadedmag = tp->mags[zi].prev) != NULL) 1729 SLIST_INSERT_HEAD(&d->full, loadedmag, nextmagazine); 1730 tp->mags[zi].prev = tp->mags[zi].loaded; 1731 mp = SLIST_FIRST(&d->empty); 1732 if (mp) { 1733 tp->mags[zi].loaded = mp; 1734 SLIST_REMOVE_HEAD(&d->empty, nextmagazine); 1735 MASSERT(MAGAZINE_NOTFULL(mp)); 1736 } else { 1737 mp = tp->newmag; 1738 tp->newmag = NULL; 1739 mp->capacity = M_MAX_ROUNDS; 1740 mp->rounds = 0; 1741 mp->flags = 0; 1742 tp->mags[zi].loaded = mp; 1743 } 1744 depot_unlock(d); 1745 } 1746 1747 return rc; 1748 } 1749 1750 static void 1751 mtmagazine_init(void) 1752 { 1753 int error; 1754 1755 error = pthread_key_create(&thread_mags_key, mtmagazine_destructor); 1756 if (error) 1757 abort(); 1758 } 1759 1760 /* 1761 * This function is only used by the thread exit destructor 1762 */ 1763 static void 1764 mtmagazine_drain(struct magazine *mp) 1765 { 1766 void *obj; 1767 1768 while (MAGAZINE_NOTEMPTY(mp)) { 1769 obj = magazine_alloc(mp, NULL); 1770 _slabfree(obj, 0, NULL); 1771 } 1772 } 1773 1774 /* 1775 * mtmagazine_destructor() 1776 * 1777 * When a thread exits, we reclaim all its resources; all its magazines are 1778 * drained and the structures are freed. 1779 * 1780 * WARNING! The destructor can be called multiple times if the larger user 1781 * program has its own destructors which run after ours which 1782 * allocate or free memory. 1783 */ 1784 static void 1785 mtmagazine_destructor(void *thrp) 1786 { 1787 thr_mags *tp = thrp; 1788 struct magazine *mp; 1789 int i; 1790 1791 /* 1792 * Prevent further use of mtmagazines while we are destructing 1793 * them, as well as for any destructors which are run after us 1794 * prior to the thread actually being destroyed. 1795 */ 1796 tp->init = -1; 1797 1798 for (i = 0; i < NZONES; i++) { 1799 mp = tp->mags[i].loaded; 1800 tp->mags[i].loaded = NULL; 1801 if (mp) { 1802 if (MAGAZINE_NOTEMPTY(mp)) 1803 mtmagazine_drain(mp); 1804 _slabfree(mp, 0, NULL); 1805 } 1806 1807 mp = tp->mags[i].prev; 1808 tp->mags[i].prev = NULL; 1809 if (mp) { 1810 if (MAGAZINE_NOTEMPTY(mp)) 1811 mtmagazine_drain(mp); 1812 _slabfree(mp, 0, NULL); 1813 } 1814 } 1815 1816 if (tp->newmag) { 1817 mp = tp->newmag; 1818 tp->newmag = NULL; 1819 _slabfree(mp, 0, NULL); 1820 } 1821 } 1822 1823 /* 1824 * zone_alloc() 1825 * 1826 * Attempt to allocate a zone from the zone magazine; the zone magazine has 1827 * M_BURST_EARLY enabled, so honor the burst request from the magazine. 1828 */ 1829 static slzone_t 1830 zone_alloc(int flags) 1831 { 1832 slglobaldata_t slgd = &SLGlobalData; 1833 int burst = 1; 1834 int i, j; 1835 slzone_t z; 1836 1837 zone_magazine_lock(); 1838 slgd_unlock(slgd); 1839 1840 z = magazine_alloc(&zone_magazine, &burst); 1841 if (z == NULL && burst == 1) { 1842 zone_magazine_unlock(); 1843 z = _vmem_alloc(ZoneSize * burst, ZoneSize, flags); 1844 } else if (z == NULL) { 1845 z = _vmem_alloc(ZoneSize * burst, ZoneSize, flags); 1846 if (z) { 1847 for (i = 1; i < burst; i++) { 1848 j = magazine_free(&zone_magazine, 1849 (char *) z + (ZoneSize * i)); 1850 MASSERT(j == 0); 1851 } 1852 } 1853 zone_magazine_unlock(); 1854 } else { 1855 z->z_Flags |= SLZF_UNOTZEROD; 1856 zone_magazine_unlock(); 1857 } 1858 slgd_lock(slgd); 1859 return z; 1860 } 1861 1862 /* 1863 * zone_free() 1864 * 1865 * Release a zone and unlock the slgd lock. 1866 */ 1867 static void 1868 zone_free(void *z) 1869 { 1870 slglobaldata_t slgd = &SLGlobalData; 1871 void *excess[M_ZONE_ROUNDS - M_LOW_ROUNDS] = {}; 1872 int i, j; 1873 1874 zone_magazine_lock(); 1875 slgd_unlock(slgd); 1876 1877 bzero(z, sizeof(struct slzone)); 1878 1879 if (opt_madvise) 1880 madvise(z, ZoneSize, MADV_FREE); 1881 1882 i = magazine_free(&zone_magazine, z); 1883 1884 /* 1885 * If we failed to free, collect excess magazines; release the zone 1886 * magazine lock, and then free to the system via _vmem_free. Re-enable 1887 * BURST mode for the magazine. 1888 */ 1889 if (i == -1) { 1890 j = zone_magazine.rounds - zone_magazine.low_factor; 1891 for (i = 0; i < j; i++) { 1892 excess[i] = magazine_alloc(&zone_magazine, NULL); 1893 MASSERT(excess[i] != NULL); 1894 } 1895 1896 zone_magazine_unlock(); 1897 1898 for (i = 0; i < j; i++) 1899 _vmem_free(excess[i], ZoneSize); 1900 1901 _vmem_free(z, ZoneSize); 1902 } else { 1903 zone_magazine_unlock(); 1904 } 1905 } 1906 1907 /* 1908 * _vmem_alloc() 1909 * 1910 * Directly map memory in PAGE_SIZE'd chunks with the specified 1911 * alignment. 1912 * 1913 * Alignment must be a multiple of PAGE_SIZE. 1914 * 1915 * Size must be >= alignment. 1916 */ 1917 static void * 1918 _vmem_alloc(size_t size, size_t align, int flags) 1919 { 1920 char *addr; 1921 char *save; 1922 size_t excess; 1923 1924 /* 1925 * Map anonymous private memory. 1926 */ 1927 addr = mmap(NULL, size, PROT_READ|PROT_WRITE, 1928 MAP_PRIVATE|MAP_ANON, -1, 0); 1929 if (addr == MAP_FAILED) 1930 return(NULL); 1931 1932 /* 1933 * Check alignment. The misaligned offset is also the excess 1934 * amount. If misaligned unmap the excess so we have a chance of 1935 * mapping at the next alignment point and recursively try again. 1936 * 1937 * BBBBBBBBBBB BBBBBBBBBBB BBBBBBBBBBB block alignment 1938 * aaaaaaaaa aaaaaaaaaaa aa mis-aligned allocation 1939 * xxxxxxxxx final excess calculation 1940 * ^ returned address 1941 */ 1942 excess = (uintptr_t)addr & (align - 1); 1943 1944 if (excess) { 1945 excess = align - excess; 1946 save = addr; 1947 1948 munmap(save + excess, size - excess); 1949 addr = _vmem_alloc(size, align, flags); 1950 munmap(save, excess); 1951 } 1952 return((void *)addr); 1953 } 1954 1955 /* 1956 * _vmem_free() 1957 * 1958 * Free a chunk of memory allocated with _vmem_alloc() 1959 */ 1960 static void 1961 _vmem_free(void *ptr, size_t size) 1962 { 1963 munmap(ptr, size); 1964 } 1965 1966 /* 1967 * Panic on fatal conditions 1968 */ 1969 static void 1970 _mpanic(const char *ctl, ...) 1971 { 1972 va_list va; 1973 1974 if (malloc_panic == 0) { 1975 malloc_panic = 1; 1976 va_start(va, ctl); 1977 vfprintf(stderr, ctl, va); 1978 fprintf(stderr, "\n"); 1979 fflush(stderr); 1980 va_end(va); 1981 } 1982 abort(); 1983 } 1984 1985 __weak_reference(__aligned_alloc, aligned_alloc); 1986 __weak_reference(__malloc, malloc); 1987 __weak_reference(__calloc, calloc); 1988 __weak_reference(__posix_memalign, posix_memalign); 1989 __weak_reference(__realloc, realloc); 1990 __weak_reference(__free, free); 1991