xref: /dflybsd-src/sys/vm/vm_zone.c (revision e7302aa08274de307cd2c3345fc64c56dbe56e21)
1 /*
2  * Copyright (c) 1997, 1998 John S. Dyson
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *	notice immediately at the beginning of the file, without modification,
10  *	this list of conditions, and the following disclaimer.
11  * 2. Absolutely no warranty of function or purpose is made by the author
12  *	John S. Dyson.
13  *
14  * $FreeBSD: src/sys/vm/vm_zone.c,v 1.30.2.6 2002/10/10 19:50:16 dillon Exp $
15  * $DragonFly: src/sys/vm/vm_zone.c,v 1.28 2008/01/23 17:35:48 nth Exp $
16  */
17 
18 #include <sys/param.h>
19 #include <sys/queue.h>
20 #include <sys/systm.h>
21 #include <sys/kernel.h>
22 #include <sys/lock.h>
23 #include <sys/malloc.h>
24 #include <sys/sysctl.h>
25 #include <sys/vmmeter.h>
26 
27 #include <vm/vm.h>
28 #include <vm/vm_object.h>
29 #include <vm/vm_page.h>
30 #include <vm/vm_map.h>
31 #include <vm/vm_kern.h>
32 #include <vm/vm_extern.h>
33 #include <vm/vm_zone.h>
34 
35 #include <sys/spinlock2.h>
36 #include <sys/mplock2.h>
37 
38 static MALLOC_DEFINE(M_ZONE, "ZONE", "Zone header");
39 
40 #define	ZONE_ERROR_INVALID 0
41 #define	ZONE_ERROR_NOTFREE 1
42 #define	ZONE_ERROR_ALREADYFREE 2
43 
44 #define ZONE_ROUNDING	32
45 
46 #define	ZENTRY_FREE	0x12342378
47 
48 static void *zget(vm_zone_t z);
49 
50 /*
51  * Return an item from the specified zone.   This function is non-blocking for
52  * ZONE_INTERRUPT zones.
53  */
54 void *
55 zalloc(vm_zone_t z)
56 {
57 	void *item;
58 
59 #ifdef INVARIANTS
60 	if (z == NULL)
61 		zerror(ZONE_ERROR_INVALID);
62 #endif
63 	spin_lock_wr(&z->zlock);
64 	if (z->zfreecnt > z->zfreemin) {
65 		item = z->zitems;
66 #ifdef INVARIANTS
67 		KASSERT(item != NULL, ("zitems unexpectedly NULL"));
68 		if (((void **) item)[1] != (void *) ZENTRY_FREE)
69 			zerror(ZONE_ERROR_NOTFREE);
70 		((void **) item)[1] = 0;
71 #endif
72 		z->zitems = ((void **) item)[0];
73 		z->zfreecnt--;
74 		z->znalloc++;
75 		spin_unlock_wr(&z->zlock);
76 	} else {
77 		spin_unlock_wr(&z->zlock);
78 		item = zget(z);
79 		/*
80 		 * PANICFAIL allows the caller to assume that the zalloc()
81 		 * will always succeed.  If it doesn't, we panic here.
82 		 */
83 		if (item == NULL && (z->zflags & ZONE_PANICFAIL))
84 			panic("zalloc(%s) failed", z->zname);
85 	}
86 	return item;
87 }
88 
89 /*
90  * Free an item to the specified zone.
91  */
92 void
93 zfree(vm_zone_t z, void *item)
94 {
95 
96 	spin_lock_wr(&z->zlock);
97 	((void **) item)[0] = z->zitems;
98 #ifdef INVARIANTS
99 	if (((void **) item)[1] == (void *) ZENTRY_FREE)
100 		zerror(ZONE_ERROR_ALREADYFREE);
101 	((void **) item)[1] = (void *) ZENTRY_FREE;
102 #endif
103 	z->zitems = item;
104 	z->zfreecnt++;
105 	spin_unlock_wr(&z->zlock);
106 }
107 
108 /*
109  * This file comprises a very simple zone allocator.  This is used
110  * in lieu of the malloc allocator, where needed or more optimal.
111  *
112  * Note that the initial implementation of this had coloring, and
113  * absolutely no improvement (actually perf degradation) occurred.
114  *
115  * Note also that the zones are type stable.  The only restriction is
116  * that the first two longwords of a data structure can be changed
117  * between allocations.  Any data that must be stable between allocations
118  * must reside in areas after the first two longwords.
119  *
120  * zinitna, zinit, zbootinit are the initialization routines.
121  * zalloc, zfree, are the allocation/free routines.
122  */
123 
124 LIST_HEAD(zlist, vm_zone) zlist = LIST_HEAD_INITIALIZER(zlist);
125 static int sysctl_vm_zone(SYSCTL_HANDLER_ARGS);
126 static int zone_kmem_pages, zone_kern_pages, zone_kmem_kvaspace;
127 
128 /*
129  * Create a zone, but don't allocate the zone structure.  If the
130  * zone had been previously created by the zone boot code, initialize
131  * various parts of the zone code.
132  *
133  * If waits are not allowed during allocation (e.g. during interrupt
134  * code), a-priori allocate the kernel virtual space, and allocate
135  * only pages when needed.
136  *
137  * Arguments:
138  * z		pointer to zone structure.
139  * obj		pointer to VM object (opt).
140  * name		name of zone.
141  * size		size of zone entries.
142  * nentries	number of zone entries allocated (only ZONE_INTERRUPT.)
143  * flags	ZONE_INTERRUPT -- items can be allocated at interrupt time.
144  * zalloc	number of pages allocated when memory is needed.
145  *
146  * Note that when using ZONE_INTERRUPT, the size of the zone is limited
147  * by the nentries argument.  The size of the memory allocatable is
148  * unlimited if ZONE_INTERRUPT is not set.
149  *
150  */
151 int
152 zinitna(vm_zone_t z, vm_object_t obj, char *name, int size,
153 	int nentries, int flags, int zalloc)
154 {
155 	int totsize;
156 
157 	/*
158 	 * Only zones created with zinit() are destroyable.
159 	 */
160 	if (z->zflags & ZONE_DESTROYABLE)
161 		panic("zinitna: can't create destroyable zone");
162 
163 	/*
164 	 * NOTE: We can only adjust zsize if we previously did not
165 	 * 	 use zbootinit().
166 	 */
167 	if ((z->zflags & ZONE_BOOT) == 0) {
168 		z->zsize = (size + ZONE_ROUNDING - 1) & ~(ZONE_ROUNDING - 1);
169 		spin_init(&z->zlock);
170 		z->zfreecnt = 0;
171 		z->ztotal = 0;
172 		z->zmax = 0;
173 		z->zname = name;
174 		z->znalloc = 0;
175 		z->zitems = NULL;
176 
177 		LIST_INSERT_HEAD(&zlist, z, zlink);
178 	}
179 
180 	z->zkmvec = NULL;
181 	z->zkmcur = z->zkmmax = 0;
182 	z->zflags |= flags;
183 
184 	/*
185 	 * If we cannot wait, allocate KVA space up front, and we will fill
186 	 * in pages as needed.  This is particularly required when creating
187 	 * an allocation space for map entries in kernel_map, because we
188 	 * do not want to go into a recursion deadlock with
189 	 * vm_map_entry_reserve().
190 	 */
191 	if (z->zflags & ZONE_INTERRUPT) {
192 		totsize = round_page(z->zsize * nentries);
193 		zone_kmem_kvaspace += totsize;
194 
195 		z->zkva = kmem_alloc_pageable(&kernel_map, totsize);
196 		if (z->zkva == 0) {
197 			LIST_REMOVE(z, zlink);
198 			return 0;
199 		}
200 
201 		z->zpagemax = totsize / PAGE_SIZE;
202 		if (obj == NULL) {
203 			z->zobj = vm_object_allocate(OBJT_DEFAULT, z->zpagemax);
204 		} else {
205 			z->zobj = obj;
206 			_vm_object_allocate(OBJT_DEFAULT, z->zpagemax, obj);
207 		}
208 		z->zallocflag = VM_ALLOC_SYSTEM | VM_ALLOC_INTERRUPT;
209 		z->zmax += nentries;
210 	} else {
211 		z->zallocflag = VM_ALLOC_NORMAL | VM_ALLOC_SYSTEM;
212 		z->zmax = 0;
213 	}
214 
215 
216 	if (z->zsize > PAGE_SIZE)
217 		z->zfreemin = 1;
218 	else
219 		z->zfreemin = PAGE_SIZE / z->zsize;
220 
221 	z->zpagecount = 0;
222 	if (zalloc)
223 		z->zalloc = zalloc;
224 	else
225 		z->zalloc = 1;
226 
227 	/*
228 	 * Populate the interrrupt zone at creation time rather than
229 	 * on first allocation, as this is a potentially long operation.
230 	 */
231 	if (z->zflags & ZONE_INTERRUPT) {
232 		void *buf;
233 
234 		buf = zget(z);
235 		zfree(z, buf);
236 	}
237 
238 	return 1;
239 }
240 
241 /*
242  * Subroutine same as zinitna, except zone data structure is allocated
243  * automatically by malloc.  This routine should normally be used, except
244  * in certain tricky startup conditions in the VM system -- then
245  * zbootinit and zinitna can be used.  Zinit is the standard zone
246  * initialization call.
247  */
248 vm_zone_t
249 zinit(char *name, int size, int nentries, int flags, int zalloc)
250 {
251 	vm_zone_t z;
252 
253 	z = (vm_zone_t) kmalloc(sizeof (struct vm_zone), M_ZONE, M_NOWAIT);
254 	if (z == NULL)
255 		return NULL;
256 
257 	z->zflags = 0;
258 	if (zinitna(z, NULL, name, size, nentries,
259 	            flags & ~ZONE_DESTROYABLE, zalloc) == 0) {
260 		kfree(z, M_ZONE);
261 		return NULL;
262 	}
263 
264 	if (flags & ZONE_DESTROYABLE)
265 		z->zflags |= ZONE_DESTROYABLE;
266 
267 	return z;
268 }
269 
270 /*
271  * Initialize a zone before the system is fully up.  This routine should
272  * only be called before full VM startup.
273  */
274 void
275 zbootinit(vm_zone_t z, char *name, int size, void *item, int nitems)
276 {
277 	int i;
278 
279 	z->zname = name;
280 	z->zsize = size;
281 	z->zpagemax = 0;
282 	z->zobj = NULL;
283 	z->zflags = ZONE_BOOT;
284 	z->zfreemin = 0;
285 	z->zallocflag = 0;
286 	z->zpagecount = 0;
287 	z->zalloc = 0;
288 	z->znalloc = 0;
289 	spin_init(&z->zlock);
290 
291 	bzero(item, nitems * z->zsize);
292 	z->zitems = NULL;
293 	for (i = 0; i < nitems; i++) {
294 		((void **) item)[0] = z->zitems;
295 #ifdef INVARIANTS
296 		((void **) item)[1] = (void *) ZENTRY_FREE;
297 #endif
298 		z->zitems = item;
299 		item = (uint8_t *)item + z->zsize;
300 	}
301 	z->zfreecnt = nitems;
302 	z->zmax = nitems;
303 	z->ztotal = nitems;
304 
305 	LIST_INSERT_HEAD(&zlist, z, zlink);
306 }
307 
308 /*
309  * Release all resources owned by zone created with zinit().
310  */
311 void
312 zdestroy(vm_zone_t z)
313 {
314 	int i;
315 
316 	if (z == NULL)
317 		panic("zdestroy: null zone");
318 	if ((z->zflags & ZONE_DESTROYABLE) == 0)
319 		panic("zdestroy: undestroyable zone");
320 
321 	LIST_REMOVE(z, zlink);
322 
323 	/*
324 	 * Release virtual mappings, physical memory and update sysctl stats.
325 	 */
326 	if (z->zflags & ZONE_INTERRUPT) {
327 		/*
328 		 * Free the mapping.
329 		 */
330 		kmem_free(&kernel_map, z->zkva, z->zpagemax*PAGE_SIZE);
331 		atomic_subtract_int(&zone_kmem_kvaspace, z->zpagemax*PAGE_SIZE);
332 		/*
333 		 * Free the backing object and physical pages.
334 		 */
335 		vm_object_deallocate(z->zobj);
336 		atomic_subtract_int(&zone_kmem_pages, z->zpagecount);
337 	} else {
338 		for (i=0; i < z->zkmcur; i++) {
339 			kmem_free(&kernel_map, z->zkmvec[i],
340 			    z->zalloc*PAGE_SIZE);
341 			atomic_subtract_int(&zone_kern_pages, z->zalloc);
342 		}
343 		if (z->zkmvec != NULL)
344 			kfree(z->zkmvec, M_ZONE);
345 	}
346 
347 	spin_uninit(&z->zlock);
348 	kfree(z, M_ZONE);
349 }
350 
351 
352 /*
353  * void *zalloc(vm_zone_t zone) --
354  *	Returns an item from a specified zone.  May not be called from a
355  *	FAST interrupt or IPI function.
356  *
357  * void zfree(vm_zone_t zone, void *item) --
358  *	Frees an item back to a specified zone.  May not be called from a
359  *	FAST interrupt or IPI function.
360  */
361 
362 /*
363  * Internal zone routine.  Not to be called from external (non vm_zone) code.
364  */
365 static void *
366 zget(vm_zone_t z)
367 {
368 	int i;
369 	vm_page_t m;
370 	int nitems, nbytes;
371 	int savezpc;
372 	void *item;
373 
374 	if (z == NULL)
375 		panic("zget: null zone");
376 
377 	if (z->zflags & ZONE_INTERRUPT) {
378 		/*
379 		 * Interrupt zones do not mess with the kernel_map, they
380 		 * simply populate an existing mapping.
381 		 */
382 		get_mplock();
383 		savezpc = z->zpagecount;
384 		nbytes = z->zpagecount * PAGE_SIZE;
385 		nbytes -= nbytes % z->zsize;
386 		item = (char *) z->zkva + nbytes;
387 		for (i = 0; ((i < z->zalloc) && (z->zpagecount < z->zpagemax));
388 		     i++) {
389 			vm_offset_t zkva;
390 
391 			m = vm_page_alloc(z->zobj, z->zpagecount,
392 					  z->zallocflag);
393 			/* note: z might be modified due to blocking */
394 			if (m == NULL)
395 				break;
396 
397 			/*
398 			 * Unbusy page so it can freed in zdestroy().  Make
399 			 * sure it is not on any queue and so can not be
400 			 * recycled under our feet.
401 			 */
402 			KKASSERT(m->queue == PQ_NONE);
403 			vm_page_flag_clear(m, PG_BUSY);
404 
405 			zkva = z->zkva + z->zpagecount * PAGE_SIZE;
406 			pmap_kenter(zkva, VM_PAGE_TO_PHYS(m)); /* YYY */
407 			bzero((void *)zkva, PAGE_SIZE);
408 			KKASSERT(savezpc == z->zpagecount);
409 			++savezpc;
410 			z->zpagecount++;
411 			zone_kmem_pages++;
412 			vmstats.v_wire_count++;
413 		}
414 		nitems = ((z->zpagecount * PAGE_SIZE) - nbytes) / z->zsize;
415 		rel_mplock();
416 	} else if (z->zflags & ZONE_SPECIAL) {
417 		/*
418 		 * The special zone is the one used for vm_map_entry_t's.
419 		 * We have to avoid an infinite recursion in
420 		 * vm_map_entry_reserve() by using vm_map_entry_kreserve()
421 		 * instead.  The map entries are pre-reserved by the kernel
422 		 * by vm_map_entry_reserve_cpu_init().
423 		 */
424 		nbytes = z->zalloc * PAGE_SIZE;
425 
426 		item = (void *)kmem_alloc3(&kernel_map, nbytes, KM_KRESERVE);
427 
428 		/* note: z might be modified due to blocking */
429 		if (item != NULL) {
430 			zone_kern_pages += z->zalloc;	/* not MP-safe XXX */
431 			bzero(item, nbytes);
432 		} else {
433 			nbytes = 0;
434 		}
435 		nitems = nbytes / z->zsize;
436 	} else {
437 		/*
438 		 * Otherwise allocate KVA from the kernel_map.
439 		 */
440 		nbytes = z->zalloc * PAGE_SIZE;
441 
442 		item = (void *)kmem_alloc3(&kernel_map, nbytes, 0);
443 
444 		/* note: z might be modified due to blocking */
445 		if (item != NULL) {
446 			zone_kern_pages += z->zalloc;	/* not MP-safe XXX */
447 			bzero(item, nbytes);
448 
449 			if (z->zflags & ZONE_DESTROYABLE) {
450 				if (z->zkmcur == z->zkmmax) {
451 					z->zkmmax =
452 						z->zkmmax==0 ? 1 : z->zkmmax*2;
453 					z->zkmvec = krealloc(z->zkmvec,
454 					    z->zkmmax * sizeof(z->zkmvec[0]),
455 					    M_ZONE, M_WAITOK);
456 				}
457 				z->zkmvec[z->zkmcur++] = (vm_offset_t)item;
458 			}
459 		} else {
460 			nbytes = 0;
461 		}
462 		nitems = nbytes / z->zsize;
463 	}
464 
465 	spin_lock_wr(&z->zlock);
466 	z->ztotal += nitems;
467 	/*
468 	 * Save one for immediate allocation
469 	 */
470 	if (nitems != 0) {
471 		nitems -= 1;
472 		for (i = 0; i < nitems; i++) {
473 			((void **) item)[0] = z->zitems;
474 #ifdef INVARIANTS
475 			((void **) item)[1] = (void *) ZENTRY_FREE;
476 #endif
477 			z->zitems = item;
478 			item = (uint8_t *)item + z->zsize;
479 		}
480 		z->zfreecnt += nitems;
481 		z->znalloc++;
482 	} else if (z->zfreecnt > 0) {
483 		item = z->zitems;
484 		z->zitems = ((void **) item)[0];
485 #ifdef INVARIANTS
486 		if (((void **) item)[1] != (void *) ZENTRY_FREE)
487 			zerror(ZONE_ERROR_NOTFREE);
488 		((void **) item)[1] = 0;
489 #endif
490 		z->zfreecnt--;
491 		z->znalloc++;
492 	} else {
493 		item = NULL;
494 	}
495 	spin_unlock_wr(&z->zlock);
496 
497 	/*
498 	 * A special zone may have used a kernel-reserved vm_map_entry.  If
499 	 * so we have to be sure to recover our reserve so we don't run out.
500 	 * We will panic if we run out.
501 	 */
502 	if (z->zflags & ZONE_SPECIAL)
503 		vm_map_entry_reserve(0);
504 
505 	return item;
506 }
507 
508 static int
509 sysctl_vm_zone(SYSCTL_HANDLER_ARGS)
510 {
511 	int error=0;
512 	vm_zone_t curzone;
513 	char tmpbuf[128];
514 	char tmpname[14];
515 
516 	ksnprintf(tmpbuf, sizeof(tmpbuf),
517 	    "\nITEM            SIZE     LIMIT    USED    FREE  REQUESTS\n");
518 	error = SYSCTL_OUT(req, tmpbuf, strlen(tmpbuf));
519 	if (error)
520 		return (error);
521 
522 	LIST_FOREACH(curzone, &zlist, zlink) {
523 		int i;
524 		int len;
525 		int offset;
526 
527 		len = strlen(curzone->zname);
528 		if (len >= (sizeof(tmpname) - 1))
529 			len = (sizeof(tmpname) - 1);
530 		for(i = 0; i < sizeof(tmpname) - 1; i++)
531 			tmpname[i] = ' ';
532 		tmpname[i] = 0;
533 		memcpy(tmpname, curzone->zname, len);
534 		tmpname[len] = ':';
535 		offset = 0;
536 		if (curzone == LIST_FIRST(&zlist)) {
537 			offset = 1;
538 			tmpbuf[0] = '\n';
539 		}
540 
541 		ksnprintf(tmpbuf + offset, sizeof(tmpbuf) - offset,
542 			"%s %6.6u, %8.8u, %6.6u, %6.6u, %8.8u\n",
543 			tmpname, curzone->zsize, curzone->zmax,
544 			(curzone->ztotal - curzone->zfreecnt),
545 			curzone->zfreecnt, curzone->znalloc);
546 
547 		len = strlen((char *)tmpbuf);
548 		if (LIST_NEXT(curzone, zlink) == NULL)
549 			tmpbuf[len - 1] = 0;
550 
551 		error = SYSCTL_OUT(req, tmpbuf, len);
552 
553 		if (error)
554 			return (error);
555 	}
556 	return (0);
557 }
558 
559 #if defined(INVARIANTS)
560 void
561 zerror(int error)
562 {
563 	char *msg;
564 
565 	switch (error) {
566 	case ZONE_ERROR_INVALID:
567 		msg = "zone: invalid zone";
568 		break;
569 	case ZONE_ERROR_NOTFREE:
570 		msg = "zone: entry not free";
571 		break;
572 	case ZONE_ERROR_ALREADYFREE:
573 		msg = "zone: freeing free entry";
574 		break;
575 	default:
576 		msg = "zone: invalid error";
577 		break;
578 	}
579 	panic(msg);
580 }
581 #endif
582 
583 SYSCTL_OID(_vm, OID_AUTO, zone, CTLTYPE_STRING|CTLFLAG_RD, \
584 	NULL, 0, sysctl_vm_zone, "A", "Zone Info");
585 
586 SYSCTL_INT(_vm, OID_AUTO, zone_kmem_pages,
587 	CTLFLAG_RD, &zone_kmem_pages, 0, "Number of interrupt safe pages allocated by zone");
588 SYSCTL_INT(_vm, OID_AUTO, zone_kmem_kvaspace,
589 	CTLFLAG_RD, &zone_kmem_kvaspace, 0, "KVA space allocated by zone");
590 SYSCTL_INT(_vm, OID_AUTO, zone_kern_pages,
591 	CTLFLAG_RD, &zone_kern_pages, 0, "Number of non-interrupt safe pages allocated by zone");
592