1 /* 2 * NMALLOC.C - New Malloc (ported from kernel slab allocator) 3 * 4 * Copyright (c) 2003,2004,2009 The DragonFly Project. All rights reserved. 5 * 6 * This code is derived from software contributed to The DragonFly Project 7 * by Matthew Dillon <dillon@backplane.com> 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in 17 * the documentation and/or other materials provided with the 18 * distribution. 19 * 3. Neither the name of The DragonFly Project nor the names of its 20 * contributors may be used to endorse or promote products derived 21 * from this software without specific, prior written permission. 22 * 23 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 24 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 25 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 26 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 27 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 28 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, 29 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 30 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 31 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 32 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 33 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 34 * SUCH DAMAGE. 35 */ 36 /* 37 * This module implements a slab allocator drop-in replacement for the 38 * libc malloc(). 39 * 40 * A slab allocator reserves a ZONE for each chunk size, then lays the 41 * chunks out in an array within the zone. Allocation and deallocation 42 * is nearly instantanious, and overhead losses are limited to a fixed 43 * worst-case amount. 44 * 45 * The slab allocator does not have to pre-initialize the list of 46 * free chunks for each zone, and the underlying VM will not be 47 * touched at all beyond the zone header until an actual allocation 48 * needs it. 49 * 50 * Slab management and locking is done on a per-zone basis. 51 * 52 * Alloc Size Chunking Number of zones 53 * 0-127 8 16 54 * 128-255 16 8 55 * 256-511 32 8 56 * 512-1023 64 8 57 * 1024-2047 128 8 58 * 2048-4095 256 8 59 * 4096-8191 512 8 60 * 8192-16383 1024 8 61 * 16384-32767 2048 8 62 * 63 * Allocations >= ZoneLimit (16K) go directly to mmap and a hash table 64 * is used to locate for free. One and Two-page allocations use the 65 * zone mechanic to avoid excessive mmap()/munmap() calls. 66 * 67 * API FEATURES AND SIDE EFFECTS 68 * 69 * + power-of-2 sized allocations up to a page will be power-of-2 aligned. 70 * Above that power-of-2 sized allocations are page-aligned. Non 71 * power-of-2 sized allocations are aligned the same as the chunk 72 * size for their zone. 73 * + malloc(0) returns a special non-NULL value 74 * + ability to allocate arbitrarily large chunks of memory 75 * + realloc will reuse the passed pointer if possible, within the 76 * limitations of the zone chunking. 77 */ 78 79 #include "libc_private.h" 80 81 #include <sys/param.h> 82 #include <sys/types.h> 83 #include <sys/mman.h> 84 #include <stdio.h> 85 #include <stdlib.h> 86 #include <stdarg.h> 87 #include <stddef.h> 88 #include <unistd.h> 89 #include <string.h> 90 #include <fcntl.h> 91 #include <errno.h> 92 93 #include "spinlock.h" 94 #include "un-namespace.h" 95 96 /* 97 * Linked list of large allocations 98 */ 99 typedef struct bigalloc { 100 struct bigalloc *next; /* hash link */ 101 void *base; /* base pointer */ 102 u_long bytes; /* bytes allocated */ 103 u_long unused01; 104 } *bigalloc_t; 105 106 /* 107 * Note that any allocations which are exact multiples of PAGE_SIZE, or 108 * which are >= ZALLOC_ZONE_LIMIT, will fall through to the kmem subsystem. 109 */ 110 #define ZALLOC_ZONE_LIMIT (16 * 1024) /* max slab-managed alloc */ 111 #define ZALLOC_MIN_ZONE_SIZE (32 * 1024) /* minimum zone size */ 112 #define ZALLOC_MAX_ZONE_SIZE (128 * 1024) /* maximum zone size */ 113 #define ZALLOC_ZONE_SIZE (64 * 1024) 114 #define ZALLOC_SLAB_MAGIC 0x736c6162 /* magic sanity */ 115 #define ZALLOC_SLAB_SLIDE 20 /* L1-cache skip */ 116 117 #if ZALLOC_ZONE_LIMIT == 16384 118 #define NZONES 72 119 #elif ZALLOC_ZONE_LIMIT == 32768 120 #define NZONES 80 121 #else 122 #error "I couldn't figure out NZONES" 123 #endif 124 125 /* 126 * Chunk structure for free elements 127 */ 128 typedef struct slchunk { 129 struct slchunk *c_Next; 130 } *slchunk_t; 131 132 /* 133 * The IN-BAND zone header is placed at the beginning of each zone. 134 */ 135 struct slglobaldata; 136 137 typedef struct slzone { 138 __int32_t z_Magic; /* magic number for sanity check */ 139 int z_NFree; /* total free chunks / ualloc space */ 140 struct slzone *z_Next; /* ZoneAry[] link if z_NFree non-zero */ 141 struct slglobaldata *z_GlobalData; 142 int z_NMax; /* maximum free chunks */ 143 char *z_BasePtr; /* pointer to start of chunk array */ 144 int z_UIndex; /* current initial allocation index */ 145 int z_UEndIndex; /* last (first) allocation index */ 146 int z_ChunkSize; /* chunk size for validation */ 147 int z_FirstFreePg; /* chunk list on a page-by-page basis */ 148 int z_ZoneIndex; 149 int z_Flags; 150 struct slchunk *z_PageAry[ZALLOC_ZONE_SIZE / PAGE_SIZE]; 151 #if defined(INVARIANTS) 152 __uint32_t z_Bitmap[]; /* bitmap of free chunks / sanity */ 153 #endif 154 } *slzone_t; 155 156 typedef struct slglobaldata { 157 spinlock_t Spinlock; 158 slzone_t ZoneAry[NZONES];/* linked list of zones NFree > 0 */ 159 slzone_t FreeZones; /* whole zones that have become free */ 160 int NFreeZones; /* free zone count */ 161 int JunkIndex; 162 } *slglobaldata_t; 163 164 #define SLZF_UNOTZEROD 0x0001 165 166 /* 167 * Misc constants. Note that allocations that are exact multiples of 168 * PAGE_SIZE, or exceed the zone limit, fall through to the kmem module. 169 * IN_SAME_PAGE_MASK is used to sanity-check the per-page free lists. 170 */ 171 #define MIN_CHUNK_SIZE 8 /* in bytes */ 172 #define MIN_CHUNK_MASK (MIN_CHUNK_SIZE - 1) 173 #define ZONE_RELS_THRESH 4 /* threshold number of zones */ 174 #define IN_SAME_PAGE_MASK (~(intptr_t)PAGE_MASK | MIN_CHUNK_MASK) 175 176 /* 177 * The WEIRD_ADDR is used as known text to copy into free objects to 178 * try to create deterministic failure cases if the data is accessed after 179 * free. 180 */ 181 #define WEIRD_ADDR 0xdeadc0de 182 #define MAX_COPY sizeof(weirdary) 183 #define ZERO_LENGTH_PTR ((void *)-8) 184 185 #define BIGHSHIFT 10 /* bigalloc hash table */ 186 #define BIGHSIZE (1 << BIGHSHIFT) 187 #define BIGHMASK (BIGHSIZE - 1) 188 #define BIGXSIZE (BIGHSIZE / 16) /* bigalloc lock table */ 189 #define BIGXMASK (BIGXSIZE - 1) 190 191 #define SLGD_MAX 4 /* parallel allocations */ 192 193 #define SAFLAG_ZERO 0x0001 194 #define SAFLAG_PASSIVE 0x0002 195 196 /* 197 * Thread control 198 */ 199 200 #define arysize(ary) (sizeof(ary)/sizeof((ary)[0])) 201 202 #define MASSERT(exp) do { if (__predict_false(!(exp))) \ 203 _mpanic("assertion: %s in %s", \ 204 #exp, __func__); \ 205 } while (0) 206 207 /* 208 * Fixed globals (not per-cpu) 209 */ 210 static const int ZoneSize = ZALLOC_ZONE_SIZE; 211 static const int ZoneLimit = ZALLOC_ZONE_LIMIT; 212 static const int ZonePageCount = ZALLOC_ZONE_SIZE / PAGE_SIZE; 213 static const int ZoneMask = ZALLOC_ZONE_SIZE - 1; 214 215 static struct slglobaldata SLGlobalData[SLGD_MAX]; 216 static bigalloc_t bigalloc_array[BIGHSIZE]; 217 static spinlock_t bigspin_array[BIGXSIZE]; 218 static int malloc_panic; 219 220 static const int32_t weirdary[16] = { 221 WEIRD_ADDR, WEIRD_ADDR, WEIRD_ADDR, WEIRD_ADDR, 222 WEIRD_ADDR, WEIRD_ADDR, WEIRD_ADDR, WEIRD_ADDR, 223 WEIRD_ADDR, WEIRD_ADDR, WEIRD_ADDR, WEIRD_ADDR, 224 WEIRD_ADDR, WEIRD_ADDR, WEIRD_ADDR, WEIRD_ADDR 225 }; 226 227 static __thread slglobaldata_t LastSLGD = &SLGlobalData[0]; 228 229 static void *_slaballoc(size_t size, int flags); 230 static void *_slabrealloc(void *ptr, size_t size); 231 static void _slabfree(void *ptr); 232 static void *_vmem_alloc(size_t bytes, size_t align, int flags); 233 static void _vmem_free(void *ptr, size_t bytes); 234 static void _mpanic(const char *ctl, ...); 235 #if defined(INVARIANTS) 236 static void chunk_mark_allocated(slzone_t z, void *chunk); 237 static void chunk_mark_free(slzone_t z, void *chunk); 238 #endif 239 240 #ifdef INVARIANTS 241 /* 242 * If enabled any memory allocated without M_ZERO is initialized to -1. 243 */ 244 static int use_malloc_pattern; 245 #endif 246 247 /* 248 * Thread locks. 249 * 250 * NOTE: slgd_trylock() returns 0 or EBUSY 251 */ 252 static __inline void 253 slgd_lock(slglobaldata_t slgd) 254 { 255 if (__isthreaded) 256 _SPINLOCK(&slgd->Spinlock); 257 } 258 259 static __inline int 260 slgd_trylock(slglobaldata_t slgd) 261 { 262 if (__isthreaded) 263 return(_SPINTRYLOCK(&slgd->Spinlock)); 264 return(0); 265 } 266 267 static __inline void 268 slgd_unlock(slglobaldata_t slgd) 269 { 270 if (__isthreaded) 271 _SPINUNLOCK(&slgd->Spinlock); 272 } 273 274 /* 275 * bigalloc hashing and locking support. 276 * 277 * Return an unmasked hash code for the passed pointer. 278 */ 279 static __inline int 280 _bigalloc_hash(void *ptr) 281 { 282 int hv; 283 284 hv = ((int)ptr >> PAGE_SHIFT) ^ ((int)ptr >> (PAGE_SHIFT + BIGHSHIFT)); 285 286 return(hv); 287 } 288 289 /* 290 * Lock the hash chain and return a pointer to its base for the specified 291 * address. 292 */ 293 static __inline bigalloc_t * 294 bigalloc_lock(void *ptr) 295 { 296 int hv = _bigalloc_hash(ptr); 297 bigalloc_t *bigp; 298 299 bigp = &bigalloc_array[hv & BIGHMASK]; 300 if (__isthreaded) 301 _SPINLOCK(&bigspin_array[hv & BIGXMASK]); 302 return(bigp); 303 } 304 305 /* 306 * Lock the hash chain and return a pointer to its base for the specified 307 * address. 308 * 309 * BUT, if the hash chain is empty, just return NULL and do not bother 310 * to lock anything. 311 */ 312 static __inline bigalloc_t * 313 bigalloc_check_and_lock(void *ptr) 314 { 315 int hv = _bigalloc_hash(ptr); 316 bigalloc_t *bigp; 317 318 bigp = &bigalloc_array[hv & BIGHMASK]; 319 if (*bigp == NULL) 320 return(NULL); 321 if (__isthreaded) { 322 _SPINLOCK(&bigspin_array[hv & BIGXMASK]); 323 } 324 return(bigp); 325 } 326 327 static __inline void 328 bigalloc_unlock(void *ptr) 329 { 330 int hv; 331 332 if (__isthreaded) { 333 hv = _bigalloc_hash(ptr); 334 _SPINUNLOCK(&bigspin_array[hv & BIGXMASK]); 335 } 336 } 337 338 /* 339 * Calculate the zone index for the allocation request size and set the 340 * allocation request size to that particular zone's chunk size. 341 */ 342 static __inline int 343 zoneindex(size_t *bytes, size_t *chunking) 344 { 345 size_t n = (unsigned int)*bytes; /* unsigned for shift opt */ 346 if (n < 128) { 347 *bytes = n = (n + 7) & ~7; 348 *chunking = 8; 349 return(n / 8 - 1); /* 8 byte chunks, 16 zones */ 350 } 351 if (n < 256) { 352 *bytes = n = (n + 15) & ~15; 353 *chunking = 16; 354 return(n / 16 + 7); 355 } 356 if (n < 8192) { 357 if (n < 512) { 358 *bytes = n = (n + 31) & ~31; 359 *chunking = 32; 360 return(n / 32 + 15); 361 } 362 if (n < 1024) { 363 *bytes = n = (n + 63) & ~63; 364 *chunking = 64; 365 return(n / 64 + 23); 366 } 367 if (n < 2048) { 368 *bytes = n = (n + 127) & ~127; 369 *chunking = 128; 370 return(n / 128 + 31); 371 } 372 if (n < 4096) { 373 *bytes = n = (n + 255) & ~255; 374 *chunking = 256; 375 return(n / 256 + 39); 376 } 377 *bytes = n = (n + 511) & ~511; 378 *chunking = 512; 379 return(n / 512 + 47); 380 } 381 #if ZALLOC_ZONE_LIMIT > 8192 382 if (n < 16384) { 383 *bytes = n = (n + 1023) & ~1023; 384 *chunking = 1024; 385 return(n / 1024 + 55); 386 } 387 #endif 388 #if ZALLOC_ZONE_LIMIT > 16384 389 if (n < 32768) { 390 *bytes = n = (n + 2047) & ~2047; 391 *chunking = 2048; 392 return(n / 2048 + 63); 393 } 394 #endif 395 _mpanic("Unexpected byte count %d", n); 396 return(0); 397 } 398 399 /* 400 * malloc() - call internal slab allocator 401 */ 402 void * 403 malloc(size_t size) 404 { 405 void *ptr; 406 407 ptr = _slaballoc(size, 0); 408 if (ptr == NULL) 409 errno = ENOMEM; 410 return(ptr); 411 } 412 413 /* 414 * calloc() - call internal slab allocator 415 */ 416 void * 417 calloc(size_t number, size_t size) 418 { 419 void *ptr; 420 421 ptr = _slaballoc(number * size, SAFLAG_ZERO); 422 if (ptr == NULL) 423 errno = ENOMEM; 424 return(ptr); 425 } 426 427 /* 428 * realloc() (SLAB ALLOCATOR) 429 * 430 * We do not attempt to optimize this routine beyond reusing the same 431 * pointer if the new size fits within the chunking of the old pointer's 432 * zone. 433 */ 434 void * 435 realloc(void *ptr, size_t size) 436 { 437 ptr = _slabrealloc(ptr, size); 438 if (ptr == NULL) 439 errno = ENOMEM; 440 return(ptr); 441 } 442 443 /* 444 * posix_memalign() 445 * 446 * Allocate (size) bytes with a alignment of (alignment), where (alignment) 447 * is a power of 2 >= sizeof(void *). 448 * 449 * The slab allocator will allocate on power-of-2 boundaries up to 450 * at least PAGE_SIZE. We use the zoneindex mechanic to find a 451 * zone matching the requirements, and _vmem_alloc() otherwise. 452 */ 453 int 454 posix_memalign(void **memptr, size_t alignment, size_t size) 455 { 456 bigalloc_t *bigp; 457 bigalloc_t big; 458 int chunking; 459 int zi; 460 461 /* 462 * OpenGroup spec issue 6 checks 463 */ 464 if ((alignment | (alignment - 1)) + 1 != (alignment << 1)) { 465 *memptr = NULL; 466 return(EINVAL); 467 } 468 if (alignment < sizeof(void *)) { 469 *memptr = NULL; 470 return(EINVAL); 471 } 472 473 /* 474 * Locate a zone matching the requirements. 475 */ 476 if (size < alignment) 477 size = alignment; 478 while (size < PAGE_SIZE) { 479 zi = zoneindex(&size, &chunking); 480 if (chunking >= alignment) { 481 *memptr = _slaballoc(size, 0); 482 return(*memptr ? 0 : ENOMEM); 483 } 484 size <<= 1; 485 } 486 487 /* 488 * If the slab allocator cannot handle it use vmem_alloc(). 489 * 490 * Alignment must be adjusted up to at least PAGE_SIZE in this case. 491 */ 492 if (alignment < PAGE_SIZE) 493 alignment = PAGE_SIZE; 494 if (size < alignment) 495 size = alignment; 496 size = (size + PAGE_MASK) & ~(size_t)PAGE_MASK; 497 *memptr = _vmem_alloc(size, alignment, 0); 498 if (*memptr == NULL) 499 return(ENOMEM); 500 501 big = _slaballoc(sizeof(struct bigalloc), 0); 502 if (big == NULL) { 503 _vmem_free(*memptr, size); 504 *memptr = NULL; 505 return(ENOMEM); 506 } 507 bigp = bigalloc_lock(*memptr); 508 big->base = *memptr; 509 big->bytes = size; 510 big->unused01 = 0; 511 big->next = *bigp; 512 *bigp = big; 513 bigalloc_unlock(*memptr); 514 515 return(0); 516 } 517 518 /* 519 * free() (SLAB ALLOCATOR) - do the obvious 520 */ 521 void 522 free(void *ptr) 523 { 524 _slabfree(ptr); 525 } 526 527 /* 528 * _slaballoc() (SLAB ALLOCATOR) 529 * 530 * Allocate memory via the slab allocator. If the request is too large, 531 * or if it page-aligned beyond a certain size, we fall back to the 532 * KMEM subsystem 533 */ 534 static void * 535 _slaballoc(size_t size, int flags) 536 { 537 slzone_t z; 538 slchunk_t chunk; 539 slglobaldata_t slgd; 540 int chunking; 541 int zi; 542 #ifdef INVARIANTS 543 int i; 544 #endif 545 int off; 546 547 /* 548 * Handle the degenerate size == 0 case. Yes, this does happen. 549 * Return a special pointer. This is to maintain compatibility with 550 * the original malloc implementation. Certain devices, such as the 551 * adaptec driver, not only allocate 0 bytes, they check for NULL and 552 * also realloc() later on. Joy. 553 */ 554 if (size == 0) 555 return(ZERO_LENGTH_PTR); 556 557 /* 558 * Handle large allocations directly. There should not be very many 559 * of these so performance is not a big issue. 560 * 561 * The backend allocator is pretty nasty on a SMP system. Use the 562 * slab allocator for one and two page-sized chunks even though we 563 * lose some efficiency. 564 */ 565 if (size >= ZoneLimit || 566 ((size & PAGE_MASK) == 0 && size > PAGE_SIZE*2)) { 567 bigalloc_t big; 568 bigalloc_t *bigp; 569 570 size = (size + PAGE_MASK) & ~(size_t)PAGE_MASK; 571 chunk = _vmem_alloc(size, PAGE_SIZE, flags); 572 if (chunk == NULL) 573 return(NULL); 574 575 big = _slaballoc(sizeof(struct bigalloc), 0); 576 if (big == NULL) { 577 _vmem_free(chunk, size); 578 return(NULL); 579 } 580 bigp = bigalloc_lock(chunk); 581 big->base = chunk; 582 big->bytes = size; 583 big->unused01 = 0; 584 big->next = *bigp; 585 *bigp = big; 586 bigalloc_unlock(chunk); 587 588 return(chunk); 589 } 590 591 /* 592 * Multi-threading support. This needs work XXX. 593 * 594 * Choose a globaldata structure to allocate from. If we cannot 595 * immediately get the lock try a different one. 596 * 597 * LastSLGD is a per-thread global. 598 */ 599 slgd = LastSLGD; 600 if (slgd_trylock(slgd) != 0) { 601 if (++slgd == &SLGlobalData[SLGD_MAX]) 602 slgd = &SLGlobalData[0]; 603 LastSLGD = slgd; 604 slgd_lock(slgd); 605 } 606 607 /* 608 * Attempt to allocate out of an existing zone. If all zones are 609 * exhausted pull one off the free list or allocate a new one. 610 * 611 * Note: zoneindex() will panic of size is too large. 612 */ 613 zi = zoneindex(&size, &chunking); 614 MASSERT(zi < NZONES); 615 616 if ((z = slgd->ZoneAry[zi]) == NULL) { 617 /* 618 * Pull the zone off the free list. If the zone on 619 * the free list happens to be correctly set up we 620 * do not have to reinitialize it. 621 */ 622 if ((z = slgd->FreeZones) != NULL) { 623 slgd->FreeZones = z->z_Next; 624 --slgd->NFreeZones; 625 if (z->z_ChunkSize == size) { 626 z->z_Magic = ZALLOC_SLAB_MAGIC; 627 z->z_Next = slgd->ZoneAry[zi]; 628 slgd->ZoneAry[zi] = z; 629 goto have_zone; 630 } 631 bzero(z, sizeof(struct slzone)); 632 z->z_Flags |= SLZF_UNOTZEROD; 633 } else { 634 z = _vmem_alloc(ZoneSize, ZoneSize, flags); 635 if (z == NULL) 636 goto fail; 637 } 638 639 /* 640 * How big is the base structure? 641 */ 642 #if defined(INVARIANTS) 643 /* 644 * Make room for z_Bitmap. An exact calculation is 645 * somewhat more complicated so don't make an exact 646 * calculation. 647 */ 648 off = offsetof(struct slzone, 649 z_Bitmap[(ZoneSize / size + 31) / 32]); 650 bzero(z->z_Bitmap, (ZoneSize / size + 31) / 8); 651 #else 652 off = sizeof(struct slzone); 653 #endif 654 655 /* 656 * Align the storage in the zone based on the chunking. 657 * 658 * Guarentee power-of-2 alignment for power-of-2-sized 659 * chunks. Otherwise align based on the chunking size 660 * (typically 8 or 16 bytes for small allocations). 661 * 662 * NOTE: Allocations >= ZoneLimit are governed by the 663 * bigalloc code and typically only guarantee page-alignment. 664 * 665 * Set initial conditions for UIndex near the zone header 666 * to reduce unecessary page faults, vs semi-randomization 667 * to improve L1 cache saturation. 668 */ 669 if ((size | (size - 1)) + 1 == (size << 1)) 670 off = (off + size - 1) & ~(size - 1); 671 else 672 off = (off + chunking - 1) & ~(chunking - 1); 673 z->z_Magic = ZALLOC_SLAB_MAGIC; 674 z->z_GlobalData = slgd; 675 z->z_ZoneIndex = zi; 676 z->z_NMax = (ZoneSize - off) / size; 677 z->z_NFree = z->z_NMax; 678 z->z_BasePtr = (char *)z + off; 679 /*z->z_UIndex = z->z_UEndIndex = slgd->JunkIndex % z->z_NMax;*/ 680 z->z_UIndex = z->z_UEndIndex = 0; 681 z->z_ChunkSize = size; 682 z->z_FirstFreePg = ZonePageCount; 683 z->z_Next = slgd->ZoneAry[zi]; 684 slgd->ZoneAry[zi] = z; 685 if ((z->z_Flags & SLZF_UNOTZEROD) == 0) { 686 flags &= ~SAFLAG_ZERO; /* already zero'd */ 687 flags |= SAFLAG_PASSIVE; 688 } 689 690 /* 691 * Slide the base index for initial allocations out of the 692 * next zone we create so we do not over-weight the lower 693 * part of the cpu memory caches. 694 */ 695 slgd->JunkIndex = (slgd->JunkIndex + ZALLOC_SLAB_SLIDE) 696 & (ZALLOC_MAX_ZONE_SIZE - 1); 697 } 698 699 /* 700 * Ok, we have a zone from which at least one chunk is available. 701 * 702 * Remove us from the ZoneAry[] when we become empty 703 */ 704 have_zone: 705 MASSERT(z->z_NFree > 0); 706 707 if (--z->z_NFree == 0) { 708 slgd->ZoneAry[zi] = z->z_Next; 709 z->z_Next = NULL; 710 } 711 712 /* 713 * Locate a chunk in a free page. This attempts to localize 714 * reallocations into earlier pages without us having to sort 715 * the chunk list. A chunk may still overlap a page boundary. 716 */ 717 while (z->z_FirstFreePg < ZonePageCount) { 718 if ((chunk = z->z_PageAry[z->z_FirstFreePg]) != NULL) { 719 #ifdef DIAGNOSTIC 720 /* 721 * Diagnostic: c_Next is not total garbage. 722 */ 723 MASSERT(chunk->c_Next == NULL || 724 ((intptr_t)chunk->c_Next & IN_SAME_PAGE_MASK) == 725 ((intptr_t)chunk & IN_SAME_PAGE_MASK)); 726 #endif 727 #ifdef INVARIANTS 728 chunk_mark_allocated(z, chunk); 729 #endif 730 MASSERT((uintptr_t)chunk & ZoneMask); 731 z->z_PageAry[z->z_FirstFreePg] = chunk->c_Next; 732 goto done; 733 } 734 ++z->z_FirstFreePg; 735 } 736 737 /* 738 * No chunks are available but NFree said we had some memory, 739 * so it must be available in the never-before-used-memory 740 * area governed by UIndex. The consequences are very 741 * serious if our zone got corrupted so we use an explicit 742 * panic rather then a KASSERT. 743 */ 744 chunk = (slchunk_t)(z->z_BasePtr + z->z_UIndex * size); 745 746 if (++z->z_UIndex == z->z_NMax) 747 z->z_UIndex = 0; 748 if (z->z_UIndex == z->z_UEndIndex) { 749 if (z->z_NFree != 0) 750 _mpanic("slaballoc: corrupted zone"); 751 } 752 753 if ((z->z_Flags & SLZF_UNOTZEROD) == 0) { 754 flags &= ~SAFLAG_ZERO; 755 flags |= SAFLAG_PASSIVE; 756 } 757 #if defined(INVARIANTS) 758 chunk_mark_allocated(z, chunk); 759 #endif 760 761 done: 762 slgd_unlock(slgd); 763 if (flags & SAFLAG_ZERO) { 764 bzero(chunk, size); 765 #ifdef INVARIANTS 766 } else if ((flags & (SAFLAG_ZERO|SAFLAG_PASSIVE)) == 0) { 767 if (use_malloc_pattern) { 768 for (i = 0; i < size; i += sizeof(int)) { 769 *(int *)((char *)chunk + i) = -1; 770 } 771 } 772 /* avoid accidental double-free check */ 773 chunk->c_Next = (void *)-1; 774 #endif 775 } 776 return(chunk); 777 fail: 778 slgd_unlock(slgd); 779 return(NULL); 780 } 781 782 /* 783 * Reallocate memory within the chunk 784 */ 785 static void * 786 _slabrealloc(void *ptr, size_t size) 787 { 788 bigalloc_t *bigp; 789 void *nptr; 790 slzone_t z; 791 size_t chunking; 792 793 if (ptr == NULL || ptr == ZERO_LENGTH_PTR) 794 return(_slaballoc(size, 0)); 795 796 if (size == 0) { 797 free(ptr); 798 return(ZERO_LENGTH_PTR); 799 } 800 801 /* 802 * Handle oversized allocations. XXX we really should require 803 * that a size be passed to free() instead of this nonsense. 804 */ 805 if ((bigp = bigalloc_check_and_lock(ptr)) != NULL) { 806 bigalloc_t big; 807 size_t bigbytes; 808 809 while ((big = *bigp) != NULL) { 810 if (big->base == ptr) { 811 size = (size + PAGE_MASK) & ~(size_t)PAGE_MASK; 812 bigbytes = big->bytes; 813 bigalloc_unlock(ptr); 814 if (bigbytes == size) 815 return(ptr); 816 if ((nptr = _slaballoc(size, 0)) == NULL) 817 return(NULL); 818 if (size > bigbytes) 819 size = bigbytes; 820 bcopy(ptr, nptr, size); 821 _slabfree(ptr); 822 return(nptr); 823 } 824 bigp = &big->next; 825 } 826 bigalloc_unlock(ptr); 827 } 828 829 /* 830 * Get the original allocation's zone. If the new request winds 831 * up using the same chunk size we do not have to do anything. 832 * 833 * NOTE: We don't have to lock the globaldata here, the fields we 834 * access here will not change at least as long as we have control 835 * over the allocation. 836 */ 837 z = (slzone_t)((uintptr_t)ptr & ~(uintptr_t)ZoneMask); 838 MASSERT(z->z_Magic == ZALLOC_SLAB_MAGIC); 839 840 /* 841 * Use zoneindex() to chunk-align the new size, as long as the 842 * new size is not too large. 843 */ 844 if (size < ZoneLimit) { 845 zoneindex(&size, &chunking); 846 if (z->z_ChunkSize == size) 847 return(ptr); 848 } 849 850 /* 851 * Allocate memory for the new request size and copy as appropriate. 852 */ 853 if ((nptr = _slaballoc(size, 0)) != NULL) { 854 if (size > z->z_ChunkSize) 855 size = z->z_ChunkSize; 856 bcopy(ptr, nptr, size); 857 _slabfree(ptr); 858 } 859 860 return(nptr); 861 } 862 863 /* 864 * free (SLAB ALLOCATOR) 865 * 866 * Free a memory block previously allocated by malloc. Note that we do not 867 * attempt to uplodate ks_loosememuse as MP races could prevent us from 868 * checking memory limits in malloc. 869 * 870 * MPSAFE 871 */ 872 static void 873 _slabfree(void *ptr) 874 { 875 slzone_t z; 876 slchunk_t chunk; 877 bigalloc_t big; 878 bigalloc_t *bigp; 879 slglobaldata_t slgd; 880 size_t size; 881 int pgno; 882 883 /* 884 * Handle NULL frees and special 0-byte allocations 885 */ 886 if (ptr == NULL) 887 return; 888 if (ptr == ZERO_LENGTH_PTR) 889 return; 890 891 /* 892 * Handle oversized allocations. 893 */ 894 if ((bigp = bigalloc_check_and_lock(ptr)) != NULL) { 895 while ((big = *bigp) != NULL) { 896 if (big->base == ptr) { 897 *bigp = big->next; 898 bigalloc_unlock(ptr); 899 size = big->bytes; 900 _slabfree(big); 901 #ifdef INVARIANTS 902 MASSERT(sizeof(weirdary) <= size); 903 bcopy(weirdary, ptr, sizeof(weirdary)); 904 #endif 905 _vmem_free(ptr, size); 906 return; 907 } 908 bigp = &big->next; 909 } 910 bigalloc_unlock(ptr); 911 } 912 913 /* 914 * Zone case. Figure out the zone based on the fact that it is 915 * ZoneSize aligned. 916 */ 917 z = (slzone_t)((uintptr_t)ptr & ~(uintptr_t)ZoneMask); 918 MASSERT(z->z_Magic == ZALLOC_SLAB_MAGIC); 919 920 pgno = ((char *)ptr - (char *)z) >> PAGE_SHIFT; 921 chunk = ptr; 922 slgd = z->z_GlobalData; 923 slgd_lock(slgd); 924 925 #ifdef INVARIANTS 926 /* 927 * Attempt to detect a double-free. To reduce overhead we only check 928 * if there appears to be link pointer at the base of the data. 929 */ 930 if (((intptr_t)chunk->c_Next - (intptr_t)z) >> PAGE_SHIFT == pgno) { 931 slchunk_t scan; 932 933 for (scan = z->z_PageAry[pgno]; scan; scan = scan->c_Next) { 934 if (scan == chunk) 935 _mpanic("Double free at %p", chunk); 936 } 937 } 938 chunk_mark_free(z, chunk); 939 #endif 940 941 /* 942 * Put weird data into the memory to detect modifications after 943 * freeing, illegal pointer use after freeing (we should fault on 944 * the odd address), and so forth. 945 */ 946 #ifdef INVARIANTS 947 if (z->z_ChunkSize < sizeof(weirdary)) 948 bcopy(weirdary, chunk, z->z_ChunkSize); 949 else 950 bcopy(weirdary, chunk, sizeof(weirdary)); 951 #endif 952 953 /* 954 * Add this free non-zero'd chunk to a linked list for reuse, adjust 955 * z_FirstFreePg. 956 */ 957 chunk->c_Next = z->z_PageAry[pgno]; 958 z->z_PageAry[pgno] = chunk; 959 if (z->z_FirstFreePg > pgno) 960 z->z_FirstFreePg = pgno; 961 962 /* 963 * Bump the number of free chunks. If it becomes non-zero the zone 964 * must be added back onto the appropriate list. 965 */ 966 if (z->z_NFree++ == 0) { 967 z->z_Next = slgd->ZoneAry[z->z_ZoneIndex]; 968 slgd->ZoneAry[z->z_ZoneIndex] = z; 969 } 970 971 /* 972 * If the zone becomes totally free then move this zone to 973 * the FreeZones list. 974 * 975 * Do not madvise here, avoiding the edge case where a malloc/free 976 * loop is sitting on the edge of a new zone. 977 * 978 * We could leave at least one zone in the ZoneAry for the index, 979 * using something like the below, but while this might be fine 980 * for the kernel (who cares about ~10MB of wasted memory), it 981 * probably isn't such a good idea for a user program. 982 * 983 * && (z->z_Next || slgd->ZoneAry[z->z_ZoneIndex] != z) 984 */ 985 if (z->z_NFree == z->z_NMax) { 986 slzone_t *pz; 987 988 pz = &slgd->ZoneAry[z->z_ZoneIndex]; 989 while (z != *pz) 990 pz = &(*pz)->z_Next; 991 *pz = z->z_Next; 992 z->z_Magic = -1; 993 z->z_Next = slgd->FreeZones; 994 slgd->FreeZones = z; 995 ++slgd->NFreeZones; 996 } 997 998 /* 999 * Limit the number of zones we keep cached. 1000 */ 1001 while (slgd->NFreeZones > ZONE_RELS_THRESH) { 1002 z = slgd->FreeZones; 1003 slgd->FreeZones = z->z_Next; 1004 --slgd->NFreeZones; 1005 slgd_unlock(slgd); 1006 _vmem_free(z, ZoneSize); 1007 slgd_lock(slgd); 1008 } 1009 slgd_unlock(slgd); 1010 } 1011 1012 #if defined(INVARIANTS) 1013 /* 1014 * Helper routines for sanity checks 1015 */ 1016 static 1017 void 1018 chunk_mark_allocated(slzone_t z, void *chunk) 1019 { 1020 int bitdex = ((char *)chunk - (char *)z->z_BasePtr) / z->z_ChunkSize; 1021 __uint32_t *bitptr; 1022 1023 MASSERT(bitdex >= 0 && bitdex < z->z_NMax); 1024 bitptr = &z->z_Bitmap[bitdex >> 5]; 1025 bitdex &= 31; 1026 MASSERT((*bitptr & (1 << bitdex)) == 0); 1027 *bitptr |= 1 << bitdex; 1028 } 1029 1030 static 1031 void 1032 chunk_mark_free(slzone_t z, void *chunk) 1033 { 1034 int bitdex = ((char *)chunk - (char *)z->z_BasePtr) / z->z_ChunkSize; 1035 __uint32_t *bitptr; 1036 1037 MASSERT(bitdex >= 0 && bitdex < z->z_NMax); 1038 bitptr = &z->z_Bitmap[bitdex >> 5]; 1039 bitdex &= 31; 1040 MASSERT((*bitptr & (1 << bitdex)) != 0); 1041 *bitptr &= ~(1 << bitdex); 1042 } 1043 1044 #endif 1045 1046 /* 1047 * _vmem_alloc() 1048 * 1049 * Directly map memory in PAGE_SIZE'd chunks with the specified 1050 * alignment. 1051 * 1052 * Alignment must be a multiple of PAGE_SIZE. 1053 * 1054 * Size must be >= alignment. 1055 */ 1056 static void * 1057 _vmem_alloc(size_t size, size_t align, int flags) 1058 { 1059 char *addr; 1060 char *save; 1061 size_t excess; 1062 1063 /* 1064 * Map anonymous private memory. 1065 */ 1066 addr = mmap(NULL, size, PROT_READ|PROT_WRITE, 1067 MAP_PRIVATE|MAP_ANON, -1, 0); 1068 if (addr == MAP_FAILED) 1069 return(NULL); 1070 1071 /* 1072 * Check alignment. The misaligned offset is also the excess 1073 * amount. If misaligned unmap the excess so we have a chance of 1074 * mapping at the next alignment point and recursively try again. 1075 * 1076 * BBBBBBBBBBB BBBBBBBBBBB BBBBBBBBBBB block alignment 1077 * aaaaaaaaa aaaaaaaaaaa aa mis-aligned allocation 1078 * xxxxxxxxx final excess calculation 1079 * ^ returned address 1080 */ 1081 excess = (uintptr_t)addr & (align - 1); 1082 1083 if (excess) { 1084 excess = align - excess; 1085 save = addr; 1086 1087 munmap(save + excess, size - excess); 1088 addr = _vmem_alloc(size, align, flags); 1089 munmap(save, excess); 1090 } 1091 return((void *)addr); 1092 } 1093 1094 /* 1095 * _vmem_free() 1096 * 1097 * Free a chunk of memory allocated with _vmem_alloc() 1098 */ 1099 static void 1100 _vmem_free(void *ptr, vm_size_t size) 1101 { 1102 munmap(ptr, size); 1103 } 1104 1105 /* 1106 * Panic on fatal conditions 1107 */ 1108 static void 1109 _mpanic(const char *ctl, ...) 1110 { 1111 va_list va; 1112 1113 if (malloc_panic == 0) { 1114 malloc_panic = 1; 1115 va_start(va, ctl); 1116 vfprintf(stderr, ctl, va); 1117 fprintf(stderr, "\n"); 1118 fflush(stderr); 1119 va_end(va); 1120 } 1121 abort(); 1122 } 1123