1 /* $OpenBSD: kern_malloc.c,v 1.75 2008/09/29 12:34:18 art Exp $ */ 2 3 /* 4 * Copyright (c) 2008 Michael Shalayeff 5 * Copyright (c) 1987, 1991, 1993 6 * The Regents of the University of California. All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. Neither the name of the University nor the names of its contributors 17 * may be used to endorse or promote products derived from this software 18 * without specific prior written permission. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 23 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 30 * SUCH DAMAGE. 31 * 32 * @(#)kern_malloc.c 8.3 (Berkeley) 1/4/94 33 */ 34 35 #include <sys/param.h> 36 #include <sys/proc.h> 37 #include <sys/kernel.h> 38 #include <sys/malloc.h> 39 #include <sys/systm.h> 40 #include <sys/sysctl.h> 41 #include <sys/time.h> 42 #include <sys/pool.h> 43 #include <sys/rwlock.h> 44 45 #include <uvm/uvm_extern.h> 46 47 static struct vm_map kmem_map_store; 48 struct vm_map *kmem_map = NULL; 49 50 #ifdef NKMEMCLUSTERS 51 #error NKMEMCLUSTERS is obsolete; remove it from your kernel config file and use NKMEMPAGES instead or let the kernel auto-size 52 #endif 53 54 /* 55 * Default number of pages in kmem_map. We attempt to calculate this 56 * at run-time, but allow it to be either patched or set in the kernel 57 * config file. 58 */ 59 #ifndef NKMEMPAGES 60 #define NKMEMPAGES 0 61 #endif 62 u_int nkmempages = NKMEMPAGES; 63 64 /* 65 * Defaults for lower- and upper-bounds for the kmem_map page count. 66 * Can be overridden by kernel config options. 67 */ 68 #ifndef NKMEMPAGES_MIN 69 #define NKMEMPAGES_MIN NKMEMPAGES_MIN_DEFAULT 70 #endif 71 u_int nkmempages_min = 0; 72 73 #ifndef NKMEMPAGES_MAX 74 #define NKMEMPAGES_MAX NKMEMPAGES_MAX_DEFAULT 75 #endif 76 u_int nkmempages_max = 0; 77 78 struct pool mallocpl[MINBUCKET + 16]; 79 char mallocplnames[MINBUCKET + 16][8]; /* wchan for pool */ 80 char mallocplwarn[MINBUCKET + 16][32]; /* warning message for hard limit */ 81 82 struct kmembuckets bucket[MINBUCKET + 16]; 83 struct kmemstats kmemstats[M_LAST]; 84 struct kmemusage *kmemusage; 85 char *kmembase, *kmemlimit; 86 char buckstring[16 * sizeof("123456,")]; 87 int buckstring_init = 0; 88 #if defined(KMEMSTATS) || defined(DIAGNOSTIC) || defined(FFS_SOFTUPDATES) 89 char *memname[] = INITKMEMNAMES; 90 char *memall = NULL; 91 struct rwlock sysctl_kmemlock = RWLOCK_INITIALIZER("sysctlklk"); 92 #endif 93 94 #ifdef DIAGNOSTIC 95 /* 96 * The WEIRD_ADDR is used as known text to copy into free objects so 97 * that modifications after frees can be detected. 98 */ 99 #ifdef DEADBEEF0 100 #define WEIRD_ADDR ((unsigned) DEADBEEF0) 101 #else 102 #define WEIRD_ADDR ((unsigned) 0xdeadbeef) 103 #endif 104 #define MAX_COPY 32 105 106 /* 107 * Normally the freelist structure is used only to hold the list pointer 108 * for free objects. However, when running with diagnostics, the first 109 * 8 bytes of the structure is unused except for diagnostic information, 110 * and the free list pointer is at offset 8 in the structure. Since the 111 * first 8 bytes is the portion of the structure most often modified, this 112 * helps to detect memory reuse problems and avoid free list corruption. 113 */ 114 struct freelist { 115 int32_t spare0; 116 int16_t type; 117 int16_t spare1; 118 caddr_t next; 119 }; 120 #else /* !DIAGNOSTIC */ 121 struct freelist { 122 caddr_t next; 123 }; 124 #endif /* DIAGNOSTIC */ 125 126 #ifndef SMALL_KERNEL 127 struct timeval malloc_errintvl = { 5, 0 }; 128 struct timeval malloc_lasterr; 129 #endif 130 131 void *malloc_page_alloc(struct pool *, int); 132 void malloc_page_free(struct pool *, void *); 133 struct pool_allocator pool_allocator_malloc = { 134 malloc_page_alloc, malloc_page_free, 0, 135 }; 136 137 void * 138 malloc_page_alloc(struct pool *pp, int flags) 139 { 140 void *v = uvm_km_getpage(flags & M_NOWAIT? 0 : 1); 141 struct vm_page *pg; 142 paddr_t pa; 143 144 if (!pmap_extract(pmap_kernel(), (vaddr_t)v, &pa)) 145 panic("malloc_page_alloc: pmap_extract failed"); 146 147 pg = PHYS_TO_VM_PAGE(pa); 148 if (pg == NULL) 149 panic("malloc_page_alloc: no page"); 150 pg->wire_count = BUCKETINDX(pp->pr_size); 151 152 return v; 153 } 154 155 void 156 malloc_page_free(struct pool *pp, void *v) 157 { 158 struct vm_page *pg; 159 paddr_t pa; 160 161 if (!pmap_extract(pmap_kernel(), (vaddr_t)v, &pa)) 162 panic("malloc_page_free: pmap_extract failed"); 163 164 pg = PHYS_TO_VM_PAGE(pa); 165 if (pg == NULL) 166 panic("malloc_page_free: no page"); 167 pg->wire_count = 1; 168 uvm_km_putpage(v); 169 } 170 171 /* 172 * Allocate a block of memory 173 */ 174 void * 175 malloc(unsigned long size, int type, int flags) 176 { 177 struct kmembuckets *kbp; 178 struct kmemusage *kup; 179 vsize_t indx, allocsize; 180 int s; 181 void *va; 182 #ifdef KMEMSTATS 183 struct kmemstats *ksp = &kmemstats[type]; 184 185 if (((unsigned long)type) >= M_LAST) 186 panic("malloc - bogus type"); 187 #endif 188 189 #ifdef MALLOC_DEBUG 190 if (debug_malloc(size, type, flags, &va)) { 191 if ((flags & M_ZERO) && va != NULL) 192 memset(va, 0, size); 193 return (va); 194 } 195 #endif 196 197 if (size > 65535 * PAGE_SIZE) { 198 if (flags & M_CANFAIL) { 199 #ifndef SMALL_KERNEL 200 if (ratecheck(&malloc_lasterr, &malloc_errintvl)) 201 printf("malloc(): allocation too large, " 202 "type = %d, size = %lu\n", type, size); 203 #endif 204 return (NULL); 205 } else 206 panic("malloc: allocation too large"); 207 } 208 209 indx = BUCKETINDX(size); 210 kbp = &bucket[indx]; 211 s = splvm(); 212 #ifdef KMEMSTATS 213 while (ksp->ks_memuse >= ksp->ks_limit) { 214 if (flags & M_NOWAIT) { 215 splx(s); 216 return (NULL); 217 } 218 if (ksp->ks_limblocks < 65535) 219 ksp->ks_limblocks++; 220 tsleep(ksp, PSWP+2, memname[type], 0); 221 } 222 #endif 223 if (size > MAXALLOCSAVE) { 224 allocsize = round_page(size); 225 va = (void *) uvm_km_kmemalloc(kmem_map, NULL, allocsize, 226 ((flags & M_NOWAIT) ? UVM_KMF_NOWAIT : 0) | 227 ((flags & M_CANFAIL) ? UVM_KMF_CANFAIL : 0)); 228 if (va == NULL) { 229 /* 230 * Kmem_malloc() can return NULL, even if it can 231 * wait, if there is no map space available, because 232 * it can't fix that problem. Neither can we, 233 * right now. (We should release pages which 234 * are completely free and which are in buckets 235 * with too many free elements.) 236 */ 237 if ((flags & (M_NOWAIT|M_CANFAIL)) == 0) 238 panic("malloc: out of space in kmem_map"); 239 splx(s); 240 return (NULL); 241 } 242 #ifdef KMEMSTATS 243 kbp->kb_total++; 244 kbp->kb_calls++; 245 #endif 246 kup = btokup(va); 247 kup->ku_indx = indx; 248 kup->ku_pagecnt = atop(allocsize); 249 } else { 250 allocsize = mallocpl[indx].pr_size; 251 va = pool_get(&mallocpl[indx], PR_LIMITFAIL | 252 (flags & M_NOWAIT ? 0 : PR_WAITOK)); 253 if (!va && (flags & (M_NOWAIT|M_CANFAIL)) == 0) 254 panic("malloc: out of space in kmem pool"); 255 } 256 257 #ifdef KMEMSTATS 258 if (va) { 259 ksp->ks_memuse += allocsize; 260 if (ksp->ks_memuse > ksp->ks_maxused) 261 ksp->ks_maxused = ksp->ks_memuse; 262 ksp->ks_size |= 1 << indx; 263 ksp->ks_inuse++; 264 ksp->ks_calls++; 265 } 266 #endif 267 splx(s); 268 269 if ((flags & M_ZERO) && va != NULL) 270 memset(va, 0, size); 271 272 return (va); 273 } 274 275 /* 276 * Free a block of memory allocated by malloc. 277 */ 278 void 279 free(void *addr, int type) 280 { 281 struct kmembuckets *kbp; 282 struct kmemusage *kup; 283 struct vm_page *pg; 284 paddr_t pa; 285 long size; 286 int s; 287 #ifdef KMEMSTATS 288 struct kmemstats *ksp = &kmemstats[type]; 289 #endif 290 291 #ifdef MALLOC_DEBUG 292 if (debug_free(addr, type)) 293 return; 294 #endif 295 296 s = splvm(); 297 if (addr >= (void *)kmembase && addr < (void *)kmemlimit) { 298 kup = btokup(addr); 299 kbp = &bucket[kup->ku_indx]; 300 size = ptoa(kup->ku_pagecnt); 301 #ifdef DIAGNOSTIC 302 if ((vaddr_t)addr != round_page((vaddr_t)addr)) 303 panic("free: unaligned addr %p, size %ld, type %s", 304 addr, size, memname[type]); 305 #endif /* DIAGNOSTIC */ 306 uvm_km_free(kmem_map, (vaddr_t)addr, size); 307 #ifdef KMEMSTATS 308 kup->ku_indx = 0; 309 kup->ku_pagecnt = 0; 310 kbp->kb_total--; 311 #endif 312 } else { 313 if (!pmap_extract(pmap_kernel(), (vaddr_t)addr, &pa)) 314 panic("free: pmap_extract failed"); 315 pg = PHYS_TO_VM_PAGE(pa); 316 if (pg == NULL) 317 panic("free: no page"); 318 #ifdef DIAGNOSTIC 319 if (pg->pg_flags & PQ_FREE) 320 panic("free: page %p is free", pg); 321 if (pg->wire_count < MINBUCKET || 322 (1 << pg->wire_count) > MAXALLOCSAVE) 323 panic("free: invalid page bucket %d", pg->wire_count); 324 #endif 325 size = mallocpl[pg->wire_count].pr_size; 326 pool_put(&mallocpl[pg->wire_count], addr); 327 } 328 329 #ifdef KMEMSTATS 330 ksp->ks_inuse--; 331 ksp->ks_memuse -= size; 332 if (ksp->ks_memuse + size >= ksp->ks_limit && 333 ksp->ks_memuse < ksp->ks_limit) 334 wakeup(ksp); /* unnecessary for pool, whatever */ 335 #endif 336 337 splx(s); 338 } 339 340 /* 341 * Compute the number of pages that kmem_map will map, that is, 342 * the size of the kernel malloc arena. 343 */ 344 void 345 kmeminit_nkmempages(void) 346 { 347 u_int npages; 348 349 if (nkmempages != 0) { 350 /* 351 * It's already been set (by us being here before, or 352 * by patching or kernel config options), bail out now. 353 */ 354 return; 355 } 356 357 /* 358 * We can't initialize these variables at compilation time, since 359 * the page size may not be known (on sparc GENERIC kernels, for 360 * example). But we still want the MD code to be able to provide 361 * better values. 362 */ 363 if (nkmempages_min == 0) 364 nkmempages_min = NKMEMPAGES_MIN; 365 if (nkmempages_max == 0) 366 nkmempages_max = NKMEMPAGES_MAX; 367 368 /* 369 * We use the following (simple) formula: 370 * 371 * - Starting point is physical memory / 4. 372 * 373 * - Clamp it down to nkmempages_max. 374 * 375 * - Round it up to nkmempages_min. 376 */ 377 npages = physmem / 4; 378 379 if (npages > nkmempages_max) 380 npages = nkmempages_max; 381 382 if (npages < nkmempages_min) 383 npages = nkmempages_min; 384 385 nkmempages = npages; 386 } 387 388 /* 389 * Initialize the kernel memory allocator 390 */ 391 void 392 kmeminit(void) 393 { 394 vaddr_t base, limit; 395 int i; 396 397 #ifdef DIAGNOSTIC 398 if (sizeof(struct freelist) > (1 << MINBUCKET)) 399 panic("kmeminit: minbucket too small/struct freelist too big"); 400 #endif 401 402 /* 403 * Compute the number of kmem_map pages, if we have not 404 * done so already. 405 */ 406 kmeminit_nkmempages(); 407 base = vm_map_min(kernel_map); 408 kmem_map = uvm_km_suballoc(kernel_map, &base, &limit, 409 (vsize_t)(nkmempages * PAGE_SIZE), VM_MAP_INTRSAFE, FALSE, 410 &kmem_map_store); 411 kmembase = (char *)base; 412 kmemlimit = (char *)limit; 413 kmemusage = (struct kmemusage *) uvm_km_zalloc(kernel_map, 414 (vsize_t)(nkmempages * sizeof(struct kmemusage))); 415 416 /* 417 * init all the sub-page pools 418 */ 419 for (i = MINBUCKET; (1 << i) <= MAXALLOCSAVE; i++) { 420 snprintf(mallocplnames[i], sizeof(mallocplnames[i]), 421 "kmem%d", i); 422 pool_init(&mallocpl[i], 1 << i, 1 << i, 0, PR_LIMITFAIL, 423 mallocplnames[i], &pool_allocator_malloc); 424 } 425 426 #ifdef KMEMSTATS 427 for (i = 0; i < MINBUCKET + 16; i++) { 428 if (1 << i >= PAGE_SIZE) 429 bucket[i].kb_elmpercl = 1; 430 else 431 bucket[i].kb_elmpercl = PAGE_SIZE / (1 << i); 432 bucket[i].kb_highwat = 5 * bucket[i].kb_elmpercl; 433 } 434 for (i = 0; i < M_LAST; i++) 435 kmemstats[i].ks_limit = nkmempages * PAGE_SIZE * 6 / 10;; 436 #endif 437 #ifdef MALLOC_DEBUG 438 debug_malloc_init(); 439 #endif 440 } 441 442 /* 443 * Return kernel malloc statistics information. 444 */ 445 int 446 sysctl_malloc(int *name, u_int namelen, void *oldp, size_t *oldlenp, void *newp, 447 size_t newlen, struct proc *p) 448 { 449 struct kmembuckets kb; 450 int i, siz; 451 452 if (namelen != 2 && name[0] != KERN_MALLOC_BUCKETS && 453 name[0] != KERN_MALLOC_KMEMNAMES) 454 return (ENOTDIR); /* overloaded */ 455 456 switch (name[0]) { 457 case KERN_MALLOC_BUCKETS: 458 /* Initialize the first time */ 459 if (buckstring_init == 0) { 460 buckstring_init = 1; 461 bzero(buckstring, sizeof(buckstring)); 462 for (siz = 0, i = MINBUCKET; i < MINBUCKET + 16; i++) { 463 snprintf(buckstring + siz, 464 sizeof buckstring - siz, 465 "%d,", (u_int)(1<<i)); 466 siz += strlen(buckstring + siz); 467 } 468 /* Remove trailing comma */ 469 if (siz) 470 buckstring[siz - 1] = '\0'; 471 } 472 return (sysctl_rdstring(oldp, oldlenp, newp, buckstring)); 473 474 case KERN_MALLOC_BUCKET: 475 bcopy(&bucket[BUCKETINDX(name[1])], &kb, sizeof(kb)); 476 return (sysctl_rdstruct(oldp, oldlenp, newp, &kb, sizeof(kb))); 477 case KERN_MALLOC_KMEMSTATS: 478 #ifdef KMEMSTATS 479 if ((name[1] < 0) || (name[1] >= M_LAST)) 480 return (EINVAL); 481 return (sysctl_rdstruct(oldp, oldlenp, newp, 482 &kmemstats[name[1]], sizeof(struct kmemstats))); 483 #else 484 return (EOPNOTSUPP); 485 #endif 486 case KERN_MALLOC_KMEMNAMES: 487 #if defined(KMEMSTATS) || defined(DIAGNOSTIC) || defined(FFS_SOFTUPDATES) 488 if (memall == NULL) { 489 int totlen; 490 491 i = rw_enter(&sysctl_kmemlock, RW_WRITE|RW_INTR); 492 if (i) 493 return (i); 494 495 /* Figure out how large a buffer we need */ 496 for (totlen = 0, i = 0; i < M_LAST; i++) { 497 if (memname[i]) 498 totlen += strlen(memname[i]); 499 totlen++; 500 } 501 memall = malloc(totlen + M_LAST, M_SYSCTL, 502 M_WAITOK|M_ZERO); 503 bzero(memall, totlen + M_LAST); 504 for (siz = 0, i = 0; i < M_LAST; i++) { 505 snprintf(memall + siz, 506 totlen + M_LAST - siz, 507 "%s,", memname[i] ? memname[i] : ""); 508 siz += strlen(memall + siz); 509 } 510 /* Remove trailing comma */ 511 if (siz) 512 memall[siz - 1] = '\0'; 513 514 /* Now, convert all spaces to underscores */ 515 for (i = 0; i < totlen; i++) 516 if (memall[i] == ' ') 517 memall[i] = '_'; 518 rw_exit_write(&sysctl_kmemlock); 519 } 520 return (sysctl_rdstring(oldp, oldlenp, newp, memall)); 521 #else 522 return (EOPNOTSUPP); 523 #endif 524 default: 525 return (EOPNOTSUPP); 526 } 527 /* NOTREACHED */ 528 } 529 530 /* 531 * Round up a size to how much malloc would actually allocate. 532 */ 533 size_t 534 malloc_roundup(size_t sz) 535 { 536 if (sz > MAXALLOCSAVE) 537 return round_page(sz); 538 539 return (1 << BUCKETINDX(sz)); 540 } 541 542 #if defined(DDB) 543 #include <machine/db_machdep.h> 544 #include <ddb/db_interface.h> 545 #include <ddb/db_output.h> 546 547 void 548 malloc_printit(int (*pr)(const char *, ...)) 549 { 550 #ifdef KMEMSTATS 551 struct kmemstats *km; 552 int i; 553 554 (*pr)("%15s %5s %6s %7s %6s %9s %8s %8s\n", 555 "Type", "InUse", "MemUse", "HighUse", "Limit", "Requests", 556 "Type Lim", "Kern Lim"); 557 for (i = 0, km = kmemstats; i < M_LAST; i++, km++) { 558 if (!km->ks_calls || !memname[i]) 559 continue; 560 561 (*pr)("%15s %5ld %6ldK %7ldK %6ldK %9ld %8d %8d\n", 562 memname[i], km->ks_inuse, km->ks_memuse / 1024, 563 km->ks_maxused / 1024, km->ks_limit / 1024, 564 km->ks_calls, km->ks_limblocks, km->ks_mapblocks); 565 } 566 #else 567 (*pr)("No KMEMSTATS compiled in\n"); 568 #endif 569 } 570 #endif /* DDB */ 571