xref: /openbsd-src/sys/kern/kern_malloc.c (revision 850e275390052b330d93020bf619a739a3c277ac)
1 /*	$OpenBSD: kern_malloc.c,v 1.75 2008/09/29 12:34:18 art Exp $	*/
2 
3 /*
4  * Copyright (c) 2008 Michael Shalayeff
5  * Copyright (c) 1987, 1991, 1993
6  *	The Regents of the University of California.  All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  * 3. Neither the name of the University nor the names of its contributors
17  *    may be used to endorse or promote products derived from this software
18  *    without specific prior written permission.
19  *
20  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
21  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
24  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30  * SUCH DAMAGE.
31  *
32  *	@(#)kern_malloc.c	8.3 (Berkeley) 1/4/94
33  */
34 
35 #include <sys/param.h>
36 #include <sys/proc.h>
37 #include <sys/kernel.h>
38 #include <sys/malloc.h>
39 #include <sys/systm.h>
40 #include <sys/sysctl.h>
41 #include <sys/time.h>
42 #include <sys/pool.h>
43 #include <sys/rwlock.h>
44 
45 #include <uvm/uvm_extern.h>
46 
47 static struct vm_map kmem_map_store;
48 struct vm_map *kmem_map = NULL;
49 
50 #ifdef NKMEMCLUSTERS
51 #error NKMEMCLUSTERS is obsolete; remove it from your kernel config file and use NKMEMPAGES instead or let the kernel auto-size
52 #endif
53 
54 /*
55  * Default number of pages in kmem_map.  We attempt to calculate this
56  * at run-time, but allow it to be either patched or set in the kernel
57  * config file.
58  */
59 #ifndef NKMEMPAGES
60 #define	NKMEMPAGES	0
61 #endif
62 u_int	nkmempages = NKMEMPAGES;
63 
64 /*
65  * Defaults for lower- and upper-bounds for the kmem_map page count.
66  * Can be overridden by kernel config options.
67  */
68 #ifndef	NKMEMPAGES_MIN
69 #define	NKMEMPAGES_MIN	NKMEMPAGES_MIN_DEFAULT
70 #endif
71 u_int	nkmempages_min = 0;
72 
73 #ifndef NKMEMPAGES_MAX
74 #define	NKMEMPAGES_MAX	NKMEMPAGES_MAX_DEFAULT
75 #endif
76 u_int	nkmempages_max = 0;
77 
78 struct pool mallocpl[MINBUCKET + 16];
79 char mallocplnames[MINBUCKET + 16][8];	/* wchan for pool */
80 char mallocplwarn[MINBUCKET + 16][32];  /* warning message for hard limit */
81 
82 struct kmembuckets bucket[MINBUCKET + 16];
83 struct kmemstats kmemstats[M_LAST];
84 struct kmemusage *kmemusage;
85 char *kmembase, *kmemlimit;
86 char buckstring[16 * sizeof("123456,")];
87 int buckstring_init = 0;
88 #if defined(KMEMSTATS) || defined(DIAGNOSTIC) || defined(FFS_SOFTUPDATES)
89 char *memname[] = INITKMEMNAMES;
90 char *memall = NULL;
91 struct rwlock sysctl_kmemlock = RWLOCK_INITIALIZER("sysctlklk");
92 #endif
93 
94 #ifdef DIAGNOSTIC
95 /*
96  * The WEIRD_ADDR is used as known text to copy into free objects so
97  * that modifications after frees can be detected.
98  */
99 #ifdef DEADBEEF0
100 #define WEIRD_ADDR	((unsigned) DEADBEEF0)
101 #else
102 #define WEIRD_ADDR	((unsigned) 0xdeadbeef)
103 #endif
104 #define MAX_COPY	32
105 
106 /*
107  * Normally the freelist structure is used only to hold the list pointer
108  * for free objects.  However, when running with diagnostics, the first
109  * 8 bytes of the structure is unused except for diagnostic information,
110  * and the free list pointer is at offset 8 in the structure.  Since the
111  * first 8 bytes is the portion of the structure most often modified, this
112  * helps to detect memory reuse problems and avoid free list corruption.
113  */
114 struct freelist {
115 	int32_t	spare0;
116 	int16_t	type;
117 	int16_t	spare1;
118 	caddr_t	next;
119 };
120 #else /* !DIAGNOSTIC */
121 struct freelist {
122 	caddr_t	next;
123 };
124 #endif /* DIAGNOSTIC */
125 
126 #ifndef SMALL_KERNEL
127 struct timeval malloc_errintvl = { 5, 0 };
128 struct timeval malloc_lasterr;
129 #endif
130 
131 void	*malloc_page_alloc(struct pool *, int);
132 void	malloc_page_free(struct pool *, void *);
133 struct pool_allocator pool_allocator_malloc = {
134 	malloc_page_alloc, malloc_page_free, 0,
135 };
136 
137 void *
138 malloc_page_alloc(struct pool *pp, int flags)
139 {
140 	void *v = uvm_km_getpage(flags & M_NOWAIT? 0 : 1);
141 	struct vm_page *pg;
142 	paddr_t pa;
143 
144 	if (!pmap_extract(pmap_kernel(), (vaddr_t)v, &pa))
145 		panic("malloc_page_alloc: pmap_extract failed");
146 
147 	pg = PHYS_TO_VM_PAGE(pa);
148 	if (pg == NULL)
149 		panic("malloc_page_alloc: no page");
150 	pg->wire_count = BUCKETINDX(pp->pr_size);
151 
152 	return v;
153 }
154 
155 void
156 malloc_page_free(struct pool *pp, void *v)
157 {
158 	struct vm_page *pg;
159 	paddr_t pa;
160 
161 	if (!pmap_extract(pmap_kernel(), (vaddr_t)v, &pa))
162 		panic("malloc_page_free: pmap_extract failed");
163 
164 	pg = PHYS_TO_VM_PAGE(pa);
165 	if (pg == NULL)
166 		panic("malloc_page_free: no page");
167 	pg->wire_count = 1;
168 	uvm_km_putpage(v);
169 }
170 
171 /*
172  * Allocate a block of memory
173  */
174 void *
175 malloc(unsigned long size, int type, int flags)
176 {
177 	struct kmembuckets *kbp;
178 	struct kmemusage *kup;
179 	vsize_t indx, allocsize;
180 	int s;
181 	void *va;
182 #ifdef KMEMSTATS
183 	struct kmemstats *ksp = &kmemstats[type];
184 
185 	if (((unsigned long)type) >= M_LAST)
186 		panic("malloc - bogus type");
187 #endif
188 
189 #ifdef MALLOC_DEBUG
190 	if (debug_malloc(size, type, flags, &va)) {
191 		if ((flags & M_ZERO) && va != NULL)
192 			memset(va, 0, size);
193 		return (va);
194 	}
195 #endif
196 
197 	if (size > 65535 * PAGE_SIZE) {
198 		if (flags & M_CANFAIL) {
199 #ifndef SMALL_KERNEL
200 			if (ratecheck(&malloc_lasterr, &malloc_errintvl))
201 				printf("malloc(): allocation too large, "
202 				    "type = %d, size = %lu\n", type, size);
203 #endif
204 			return (NULL);
205 		} else
206 			panic("malloc: allocation too large");
207 	}
208 
209 	indx = BUCKETINDX(size);
210 	kbp = &bucket[indx];
211 	s = splvm();
212 #ifdef KMEMSTATS
213 	while (ksp->ks_memuse >= ksp->ks_limit) {
214 		if (flags & M_NOWAIT) {
215 			splx(s);
216 			return (NULL);
217 		}
218 		if (ksp->ks_limblocks < 65535)
219 			ksp->ks_limblocks++;
220 		tsleep(ksp, PSWP+2, memname[type], 0);
221 	}
222 #endif
223 	if (size > MAXALLOCSAVE) {
224 		allocsize = round_page(size);
225 		va = (void *) uvm_km_kmemalloc(kmem_map, NULL, allocsize,
226 		    ((flags & M_NOWAIT) ? UVM_KMF_NOWAIT : 0) |
227 		    ((flags & M_CANFAIL) ? UVM_KMF_CANFAIL : 0));
228 		if (va == NULL) {
229 			/*
230 			 * Kmem_malloc() can return NULL, even if it can
231 			 * wait, if there is no map space available, because
232 			 * it can't fix that problem.  Neither can we,
233 			 * right now.  (We should release pages which
234 			 * are completely free and which are in buckets
235 			 * with too many free elements.)
236 			 */
237 			if ((flags & (M_NOWAIT|M_CANFAIL)) == 0)
238 				panic("malloc: out of space in kmem_map");
239 			splx(s);
240 			return (NULL);
241 		}
242 #ifdef KMEMSTATS
243 		kbp->kb_total++;
244 		kbp->kb_calls++;
245 #endif
246 		kup = btokup(va);
247 		kup->ku_indx = indx;
248 		kup->ku_pagecnt = atop(allocsize);
249 	} else {
250 		allocsize = mallocpl[indx].pr_size;
251 		va = pool_get(&mallocpl[indx], PR_LIMITFAIL |
252 		    (flags & M_NOWAIT ? 0 : PR_WAITOK));
253 		if (!va && (flags & (M_NOWAIT|M_CANFAIL)) == 0)
254 			panic("malloc: out of space in kmem pool");
255 	}
256 
257 #ifdef KMEMSTATS
258 	if (va) {
259 		ksp->ks_memuse += allocsize;
260 		if (ksp->ks_memuse > ksp->ks_maxused)
261 			ksp->ks_maxused = ksp->ks_memuse;
262 		ksp->ks_size |= 1 << indx;
263 		ksp->ks_inuse++;
264 		ksp->ks_calls++;
265 	}
266 #endif
267 	splx(s);
268 
269 	if ((flags & M_ZERO) && va != NULL)
270 		memset(va, 0, size);
271 
272 	return (va);
273 }
274 
275 /*
276  * Free a block of memory allocated by malloc.
277  */
278 void
279 free(void *addr, int type)
280 {
281 	struct kmembuckets *kbp;
282 	struct kmemusage *kup;
283 	struct vm_page *pg;
284 	paddr_t pa;
285 	long size;
286 	int s;
287 #ifdef KMEMSTATS
288 	struct kmemstats *ksp = &kmemstats[type];
289 #endif
290 
291 #ifdef MALLOC_DEBUG
292 	if (debug_free(addr, type))
293 		return;
294 #endif
295 
296 	s = splvm();
297 	if (addr >= (void *)kmembase && addr < (void *)kmemlimit) {
298 		kup = btokup(addr);
299 		kbp = &bucket[kup->ku_indx];
300 		size = ptoa(kup->ku_pagecnt);
301 #ifdef DIAGNOSTIC
302 		if ((vaddr_t)addr != round_page((vaddr_t)addr))
303 			panic("free: unaligned addr %p, size %ld, type %s",
304 			    addr, size, memname[type]);
305 #endif /* DIAGNOSTIC */
306 		uvm_km_free(kmem_map, (vaddr_t)addr, size);
307 #ifdef KMEMSTATS
308 		kup->ku_indx = 0;
309 		kup->ku_pagecnt = 0;
310 		kbp->kb_total--;
311 #endif
312 	} else {
313 		if (!pmap_extract(pmap_kernel(), (vaddr_t)addr, &pa))
314 			panic("free: pmap_extract failed");
315 		pg = PHYS_TO_VM_PAGE(pa);
316 		if (pg == NULL)
317 			panic("free: no page");
318 #ifdef DIAGNOSTIC
319 		if (pg->pg_flags & PQ_FREE)
320 			panic("free: page %p is free", pg);
321 		if (pg->wire_count < MINBUCKET ||
322 		    (1 << pg->wire_count) > MAXALLOCSAVE)
323 			panic("free: invalid page bucket %d", pg->wire_count);
324 #endif
325 		size = mallocpl[pg->wire_count].pr_size;
326 		pool_put(&mallocpl[pg->wire_count], addr);
327 	}
328 
329 #ifdef KMEMSTATS
330 	ksp->ks_inuse--;
331 	ksp->ks_memuse -= size;
332 	if (ksp->ks_memuse + size >= ksp->ks_limit &&
333 	    ksp->ks_memuse < ksp->ks_limit)
334 		wakeup(ksp);		/* unnecessary for pool, whatever */
335 #endif
336 
337 	splx(s);
338 }
339 
340 /*
341  * Compute the number of pages that kmem_map will map, that is,
342  * the size of the kernel malloc arena.
343  */
344 void
345 kmeminit_nkmempages(void)
346 {
347 	u_int npages;
348 
349 	if (nkmempages != 0) {
350 		/*
351 		 * It's already been set (by us being here before, or
352 		 * by patching or kernel config options), bail out now.
353 		 */
354 		return;
355 	}
356 
357 	/*
358 	 * We can't initialize these variables at compilation time, since
359 	 * the page size may not be known (on sparc GENERIC kernels, for
360 	 * example). But we still want the MD code to be able to provide
361 	 * better values.
362 	 */
363 	if (nkmempages_min == 0)
364 		nkmempages_min = NKMEMPAGES_MIN;
365 	if (nkmempages_max == 0)
366 		nkmempages_max = NKMEMPAGES_MAX;
367 
368 	/*
369 	 * We use the following (simple) formula:
370 	 *
371 	 *	- Starting point is physical memory / 4.
372 	 *
373 	 *	- Clamp it down to nkmempages_max.
374 	 *
375 	 *	- Round it up to nkmempages_min.
376 	 */
377 	npages = physmem / 4;
378 
379 	if (npages > nkmempages_max)
380 		npages = nkmempages_max;
381 
382 	if (npages < nkmempages_min)
383 		npages = nkmempages_min;
384 
385 	nkmempages = npages;
386 }
387 
388 /*
389  * Initialize the kernel memory allocator
390  */
391 void
392 kmeminit(void)
393 {
394 	vaddr_t base, limit;
395 	int i;
396 
397 #ifdef DIAGNOSTIC
398 	if (sizeof(struct freelist) > (1 << MINBUCKET))
399 		panic("kmeminit: minbucket too small/struct freelist too big");
400 #endif
401 
402 	/*
403 	 * Compute the number of kmem_map pages, if we have not
404 	 * done so already.
405 	 */
406 	kmeminit_nkmempages();
407 	base = vm_map_min(kernel_map);
408 	kmem_map = uvm_km_suballoc(kernel_map, &base, &limit,
409 	    (vsize_t)(nkmempages * PAGE_SIZE), VM_MAP_INTRSAFE, FALSE,
410 	    &kmem_map_store);
411 	kmembase = (char *)base;
412 	kmemlimit = (char *)limit;
413 	kmemusage = (struct kmemusage *) uvm_km_zalloc(kernel_map,
414 		(vsize_t)(nkmempages * sizeof(struct kmemusage)));
415 
416 	/*
417 	 * init all the sub-page pools
418 	 */
419 	for (i = MINBUCKET; (1 << i) <= MAXALLOCSAVE; i++) {
420 		snprintf(mallocplnames[i], sizeof(mallocplnames[i]),
421 		    "kmem%d", i);
422 		pool_init(&mallocpl[i], 1 << i, 1 << i, 0, PR_LIMITFAIL,
423 		    mallocplnames[i], &pool_allocator_malloc);
424 	}
425 
426 #ifdef KMEMSTATS
427 	for (i = 0; i < MINBUCKET + 16; i++) {
428 		if (1 << i >= PAGE_SIZE)
429 			bucket[i].kb_elmpercl = 1;
430 		else
431 			bucket[i].kb_elmpercl = PAGE_SIZE / (1 << i);
432 		bucket[i].kb_highwat = 5 * bucket[i].kb_elmpercl;
433 	}
434 	for (i = 0; i < M_LAST; i++)
435 		kmemstats[i].ks_limit = nkmempages * PAGE_SIZE * 6 / 10;;
436 #endif
437 #ifdef MALLOC_DEBUG
438 	debug_malloc_init();
439 #endif
440 }
441 
442 /*
443  * Return kernel malloc statistics information.
444  */
445 int
446 sysctl_malloc(int *name, u_int namelen, void *oldp, size_t *oldlenp, void *newp,
447     size_t newlen, struct proc *p)
448 {
449 	struct kmembuckets kb;
450 	int i, siz;
451 
452 	if (namelen != 2 && name[0] != KERN_MALLOC_BUCKETS &&
453 	    name[0] != KERN_MALLOC_KMEMNAMES)
454 		return (ENOTDIR);		/* overloaded */
455 
456 	switch (name[0]) {
457 	case KERN_MALLOC_BUCKETS:
458 		/* Initialize the first time */
459 		if (buckstring_init == 0) {
460 			buckstring_init = 1;
461 			bzero(buckstring, sizeof(buckstring));
462 			for (siz = 0, i = MINBUCKET; i < MINBUCKET + 16; i++) {
463 				snprintf(buckstring + siz,
464 				    sizeof buckstring - siz,
465 				    "%d,", (u_int)(1<<i));
466 				siz += strlen(buckstring + siz);
467 			}
468 			/* Remove trailing comma */
469 			if (siz)
470 				buckstring[siz - 1] = '\0';
471 		}
472 		return (sysctl_rdstring(oldp, oldlenp, newp, buckstring));
473 
474 	case KERN_MALLOC_BUCKET:
475 		bcopy(&bucket[BUCKETINDX(name[1])], &kb, sizeof(kb));
476 		return (sysctl_rdstruct(oldp, oldlenp, newp, &kb, sizeof(kb)));
477 	case KERN_MALLOC_KMEMSTATS:
478 #ifdef KMEMSTATS
479 		if ((name[1] < 0) || (name[1] >= M_LAST))
480 			return (EINVAL);
481 		return (sysctl_rdstruct(oldp, oldlenp, newp,
482 		    &kmemstats[name[1]], sizeof(struct kmemstats)));
483 #else
484 		return (EOPNOTSUPP);
485 #endif
486 	case KERN_MALLOC_KMEMNAMES:
487 #if defined(KMEMSTATS) || defined(DIAGNOSTIC) || defined(FFS_SOFTUPDATES)
488 		if (memall == NULL) {
489 			int totlen;
490 
491 			i = rw_enter(&sysctl_kmemlock, RW_WRITE|RW_INTR);
492 			if (i)
493 				return (i);
494 
495 			/* Figure out how large a buffer we need */
496 			for (totlen = 0, i = 0; i < M_LAST; i++) {
497 				if (memname[i])
498 					totlen += strlen(memname[i]);
499 				totlen++;
500 			}
501 			memall = malloc(totlen + M_LAST, M_SYSCTL,
502 			    M_WAITOK|M_ZERO);
503 			bzero(memall, totlen + M_LAST);
504 			for (siz = 0, i = 0; i < M_LAST; i++) {
505 				snprintf(memall + siz,
506 				    totlen + M_LAST - siz,
507 				    "%s,", memname[i] ? memname[i] : "");
508 				siz += strlen(memall + siz);
509 			}
510 			/* Remove trailing comma */
511 			if (siz)
512 				memall[siz - 1] = '\0';
513 
514 			/* Now, convert all spaces to underscores */
515 			for (i = 0; i < totlen; i++)
516 				if (memall[i] == ' ')
517 					memall[i] = '_';
518 			rw_exit_write(&sysctl_kmemlock);
519 		}
520 		return (sysctl_rdstring(oldp, oldlenp, newp, memall));
521 #else
522 		return (EOPNOTSUPP);
523 #endif
524 	default:
525 		return (EOPNOTSUPP);
526 	}
527 	/* NOTREACHED */
528 }
529 
530 /*
531  * Round up a size to how much malloc would actually allocate.
532  */
533 size_t
534 malloc_roundup(size_t sz)
535 {
536 	if (sz > MAXALLOCSAVE)
537 		return round_page(sz);
538 
539 	return (1 << BUCKETINDX(sz));
540 }
541 
542 #if defined(DDB)
543 #include <machine/db_machdep.h>
544 #include <ddb/db_interface.h>
545 #include <ddb/db_output.h>
546 
547 void
548 malloc_printit(int (*pr)(const char *, ...))
549 {
550 #ifdef KMEMSTATS
551 	struct kmemstats *km;
552 	int i;
553 
554 	(*pr)("%15s %5s  %6s  %7s  %6s %9s %8s %8s\n",
555 	    "Type", "InUse", "MemUse", "HighUse", "Limit", "Requests",
556 	    "Type Lim", "Kern Lim");
557 	for (i = 0, km = kmemstats; i < M_LAST; i++, km++) {
558 		if (!km->ks_calls || !memname[i])
559 			continue;
560 
561 		(*pr)("%15s %5ld %6ldK %7ldK %6ldK %9ld %8d %8d\n",
562 		    memname[i], km->ks_inuse, km->ks_memuse / 1024,
563 		    km->ks_maxused / 1024, km->ks_limit / 1024,
564 		    km->ks_calls, km->ks_limblocks, km->ks_mapblocks);
565 	}
566 #else
567 	(*pr)("No KMEMSTATS compiled in\n");
568 #endif
569 }
570 #endif /* DDB */
571