1 /* 2 * Copyright (c) 1997, 1998 John S. Dyson 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice immediately at the beginning of the file, without modification, 10 * this list of conditions, and the following disclaimer. 11 * 2. Absolutely no warranty of function or purpose is made by the author 12 * John S. Dyson. 13 * 14 * $FreeBSD: src/sys/vm/vm_zone.c,v 1.30.2.6 2002/10/10 19:50:16 dillon Exp $ 15 * $DragonFly: src/sys/vm/vm_zone.c,v 1.7 2003/07/29 21:24:33 hmp Exp $ 16 */ 17 18 #include <sys/param.h> 19 #include <sys/systm.h> 20 #include <sys/kernel.h> 21 #include <sys/lock.h> 22 #include <sys/malloc.h> 23 #include <sys/sysctl.h> 24 #include <sys/vmmeter.h> 25 26 #include <vm/vm.h> 27 #include <vm/vm_object.h> 28 #include <vm/vm_page.h> 29 #include <vm/vm_map.h> 30 #include <vm/vm_kern.h> 31 #include <vm/vm_extern.h> 32 #include <vm/vm_zone.h> 33 34 static MALLOC_DEFINE(M_ZONE, "ZONE", "Zone header"); 35 36 #define ZONE_ERROR_INVALID 0 37 #define ZONE_ERROR_NOTFREE 1 38 #define ZONE_ERROR_ALREADYFREE 2 39 40 #define ZONE_ROUNDING 32 41 42 #define ZENTRY_FREE 0x12342378 43 44 static void *zget(vm_zone_t z); 45 46 /* 47 * Return an item from the specified zone. This function is interrupt/MP 48 * thread safe, but might block. 49 */ 50 void * 51 zalloc(vm_zone_t z) 52 { 53 void *item; 54 55 #ifdef INVARIANTS 56 if (z == NULL) 57 zerror(ZONE_ERROR_INVALID); 58 #endif 59 lwkt_gettoken(&z->zlock); 60 if (z->zfreecnt <= z->zfreemin) { 61 item = zget(z); 62 /* 63 * PANICFAIL allows the caller to assume that the zalloc() 64 * will always succeed. If it doesn't, we panic here. 65 */ 66 if (item == NULL && (z->zflags & ZONE_PANICFAIL)) 67 panic("zalloc(%s) failed", z->zname); 68 } else { 69 item = z->zitems; 70 z->zitems = ((void **) item)[0]; 71 #ifdef INVARIANTS 72 KASSERT(item != NULL, ("zitems unexpectedly NULL")); 73 if (((void **) item)[1] != (void *) ZENTRY_FREE) 74 zerror(ZONE_ERROR_NOTFREE); 75 ((void **) item)[1] = 0; 76 #endif 77 z->zfreecnt--; 78 z->znalloc++; 79 } 80 lwkt_reltoken(&z->zlock); 81 return item; 82 } 83 84 /* 85 * Free an item to the specified zone. This function is interrupt/MP 86 * thread safe, but might block. 87 */ 88 void 89 zfree(vm_zone_t z, void *item) 90 { 91 lwkt_gettoken(&z->zlock); 92 ((void **) item)[0] = z->zitems; 93 #ifdef INVARIANTS 94 if (((void **) item)[1] == (void *) ZENTRY_FREE) 95 zerror(ZONE_ERROR_ALREADYFREE); 96 ((void **) item)[1] = (void *) ZENTRY_FREE; 97 #endif 98 z->zitems = item; 99 z->zfreecnt++; 100 lwkt_reltoken(&z->zlock); 101 } 102 103 /* 104 * This file comprises a very simple zone allocator. This is used 105 * in lieu of the malloc allocator, where needed or more optimal. 106 * 107 * Note that the initial implementation of this had coloring, and 108 * absolutely no improvement (actually perf degradation) occurred. 109 * 110 * Note also that the zones are type stable. The only restriction is 111 * that the first two longwords of a data structure can be changed 112 * between allocations. Any data that must be stable between allocations 113 * must reside in areas after the first two longwords. 114 * 115 * zinitna, zinit, zbootinit are the initialization routines. 116 * zalloc, zfree, are the interrupt/lock unsafe allocation/free routines. 117 * zalloci, zfreei, are the interrupt/lock safe allocation/free routines. 118 */ 119 120 static struct vm_zone *zlist; 121 static int sysctl_vm_zone(SYSCTL_HANDLER_ARGS); 122 static int zone_kmem_pages, zone_kern_pages, zone_kmem_kvaspace; 123 124 /* 125 * Create a zone, but don't allocate the zone structure. If the 126 * zone had been previously created by the zone boot code, initialize 127 * various parts of the zone code. 128 * 129 * If waits are not allowed during allocation (e.g. during interrupt 130 * code), a-priori allocate the kernel virtual space, and allocate 131 * only pages when needed. 132 * 133 * Arguments: 134 * z pointer to zone structure. 135 * obj pointer to VM object (opt). 136 * name name of zone. 137 * size size of zone entries. 138 * nentries number of zone entries allocated (only ZONE_INTERRUPT.) 139 * flags ZONE_INTERRUPT -- items can be allocated at interrupt time. 140 * zalloc number of pages allocated when memory is needed. 141 * 142 * Note that when using ZONE_INTERRUPT, the size of the zone is limited 143 * by the nentries argument. The size of the memory allocatable is 144 * unlimited if ZONE_INTERRUPT is not set. 145 * 146 */ 147 int 148 zinitna(vm_zone_t z, vm_object_t obj, char *name, int size, 149 int nentries, int flags, int zalloc) 150 { 151 int totsize; 152 153 if ((z->zflags & ZONE_BOOT) == 0) { 154 z->zsize = (size + ZONE_ROUNDING - 1) & ~(ZONE_ROUNDING - 1); 155 lwkt_inittoken(&z->zlock); 156 z->zfreecnt = 0; 157 z->ztotal = 0; 158 z->zmax = 0; 159 z->zname = name; 160 z->znalloc = 0; 161 z->zitems = NULL; 162 163 z->znext = zlist; 164 zlist = z; 165 } 166 167 z->zflags |= flags; 168 169 /* 170 * If we cannot wait, allocate KVA space up front, and we will fill 171 * in pages as needed. 172 */ 173 if (z->zflags & ZONE_INTERRUPT) { 174 175 totsize = round_page(z->zsize * nentries); 176 zone_kmem_kvaspace += totsize; 177 178 z->zkva = kmem_alloc_pageable(kernel_map, totsize); 179 if (z->zkva == 0) { 180 zlist = z->znext; 181 return 0; 182 } 183 184 z->zpagemax = totsize / PAGE_SIZE; 185 if (obj == NULL) { 186 z->zobj = vm_object_allocate(OBJT_DEFAULT, z->zpagemax); 187 } else { 188 z->zobj = obj; 189 _vm_object_allocate(OBJT_DEFAULT, z->zpagemax, obj); 190 } 191 z->zallocflag = VM_ALLOC_INTERRUPT; 192 z->zmax += nentries; 193 } else { 194 z->zallocflag = VM_ALLOC_SYSTEM; 195 z->zmax = 0; 196 } 197 198 199 if (z->zsize > PAGE_SIZE) 200 z->zfreemin = 1; 201 else 202 z->zfreemin = PAGE_SIZE / z->zsize; 203 204 z->zpagecount = 0; 205 if (zalloc) 206 z->zalloc = zalloc; 207 else 208 z->zalloc = 1; 209 210 return 1; 211 } 212 213 /* 214 * Subroutine same as zinitna, except zone data structure is allocated 215 * automatically by malloc. This routine should normally be used, except 216 * in certain tricky startup conditions in the VM system -- then 217 * zbootinit and zinitna can be used. Zinit is the standard zone 218 * initialization call. 219 */ 220 vm_zone_t 221 zinit(char *name, int size, int nentries, int flags, int zalloc) 222 { 223 vm_zone_t z; 224 225 z = (vm_zone_t) malloc(sizeof (struct vm_zone), M_ZONE, M_NOWAIT); 226 if (z == NULL) 227 return NULL; 228 229 z->zflags = 0; 230 if (zinitna(z, NULL, name, size, nentries, flags, zalloc) == 0) { 231 free(z, M_ZONE); 232 return NULL; 233 } 234 235 return z; 236 } 237 238 /* 239 * Initialize a zone before the system is fully up. This routine should 240 * only be called before full VM startup. 241 */ 242 void 243 zbootinit(vm_zone_t z, char *name, int size, void *item, int nitems) 244 { 245 int i; 246 247 z->zname = name; 248 z->zsize = size; 249 z->zpagemax = 0; 250 z->zobj = NULL; 251 z->zflags = ZONE_BOOT; 252 z->zfreemin = 0; 253 z->zallocflag = 0; 254 z->zpagecount = 0; 255 z->zalloc = 0; 256 z->znalloc = 0; 257 lwkt_inittoken(&z->zlock); 258 259 bzero(item, nitems * z->zsize); 260 z->zitems = NULL; 261 for (i = 0; i < nitems; i++) { 262 ((void **) item)[0] = z->zitems; 263 #ifdef INVARIANTS 264 ((void **) item)[1] = (void *) ZENTRY_FREE; 265 #endif 266 z->zitems = item; 267 (char *) item += z->zsize; 268 } 269 z->zfreecnt = nitems; 270 z->zmax = nitems; 271 z->ztotal = nitems; 272 273 if (zlist == 0) { 274 zlist = z; 275 } else { 276 z->znext = zlist; 277 zlist = z; 278 } 279 } 280 281 /* 282 * void *zalloc(vm_zone_t zone) -- 283 * Returns an item from a specified zone. 284 * 285 * void zfree(vm_zone_t zone, void *item) -- 286 * Frees an item back to a specified zone. 287 * 288 * void *zalloci(vm_zone_t zone) -- 289 * Returns an item from a specified zone, interrupt safe. 290 * 291 * void zfreei(vm_zone_t zone, void *item) -- 292 * Frees an item back to a specified zone, interrupt safe. 293 * 294 */ 295 296 /* 297 * Internal zone routine. Not to be called from external (non vm_zone) code. 298 */ 299 static void * 300 zget(vm_zone_t z) 301 { 302 int i; 303 vm_page_t m; 304 int nitems, nbytes; 305 void *item; 306 307 if (z == NULL) 308 panic("zget: null zone"); 309 310 if (z->zflags & ZONE_INTERRUPT) { 311 nbytes = z->zpagecount * PAGE_SIZE; 312 nbytes -= nbytes % z->zsize; 313 item = (char *) z->zkva + nbytes; 314 for (i = 0; ((i < z->zalloc) && (z->zpagecount < z->zpagemax)); 315 i++) { 316 vm_offset_t zkva; 317 318 m = vm_page_alloc(z->zobj, z->zpagecount, 319 z->zallocflag); 320 if (m == NULL) 321 break; 322 lwkt_regettoken(&z->zlock); 323 324 zkva = z->zkva + z->zpagecount * PAGE_SIZE; 325 pmap_kenter(zkva, VM_PAGE_TO_PHYS(m)); /* YYY */ 326 bzero((caddr_t) zkva, PAGE_SIZE); 327 z->zpagecount++; 328 zone_kmem_pages++; 329 vmstats.v_wire_count++; 330 } 331 nitems = ((z->zpagecount * PAGE_SIZE) - nbytes) / z->zsize; 332 } else { 333 nbytes = z->zalloc * PAGE_SIZE; 334 335 /* 336 * Check to see if the kernel map is already locked. 337 * We could allow for recursive locks, but that eliminates 338 * a valuable debugging mechanism, and opens up the kernel 339 * map for potential corruption by inconsistent data structure 340 * manipulation. We could also use the interrupt allocation 341 * mechanism, but that has size limitations. Luckily, we 342 * have kmem_map that is a submap of kernel map available 343 * for memory allocation, and manipulation of that map doesn't 344 * affect the kernel map structures themselves. 345 * 346 * We can wait, so just do normal map allocation in the 347 * appropriate map. 348 */ 349 if (lockstatus(&kernel_map->lock, NULL)) { 350 int s; 351 s = splvm(); 352 item = (void *) kmem_malloc(kmem_map, nbytes, M_WAITOK); 353 lwkt_regettoken(&z->zlock); 354 if (item != NULL) 355 zone_kmem_pages += z->zalloc; 356 splx(s); 357 } else { 358 item = (void *) kmem_alloc(kernel_map, nbytes); 359 lwkt_regettoken(&z->zlock); 360 if (item != NULL) 361 zone_kern_pages += z->zalloc; 362 } 363 if (item != NULL) { 364 bzero(item, nbytes); 365 } else { 366 nbytes = 0; 367 } 368 nitems = nbytes / z->zsize; 369 } 370 z->ztotal += nitems; 371 372 /* 373 * Save one for immediate allocation 374 */ 375 if (nitems != 0) { 376 nitems -= 1; 377 for (i = 0; i < nitems; i++) { 378 ((void **) item)[0] = z->zitems; 379 #ifdef INVARIANTS 380 ((void **) item)[1] = (void *) ZENTRY_FREE; 381 #endif 382 z->zitems = item; 383 (char *) item += z->zsize; 384 } 385 z->zfreecnt += nitems; 386 z->znalloc++; 387 } else if (z->zfreecnt > 0) { 388 item = z->zitems; 389 z->zitems = ((void **) item)[0]; 390 #ifdef INVARIANTS 391 if (((void **) item)[1] != (void *) ZENTRY_FREE) 392 zerror(ZONE_ERROR_NOTFREE); 393 ((void **) item)[1] = 0; 394 #endif 395 z->zfreecnt--; 396 z->znalloc++; 397 } else { 398 item = NULL; 399 } 400 401 return item; 402 } 403 404 static int 405 sysctl_vm_zone(SYSCTL_HANDLER_ARGS) 406 { 407 int error=0; 408 vm_zone_t curzone, nextzone; 409 char tmpbuf[128]; 410 char tmpname[14]; 411 412 snprintf(tmpbuf, sizeof(tmpbuf), 413 "\nITEM SIZE LIMIT USED FREE REQUESTS\n"); 414 error = SYSCTL_OUT(req, tmpbuf, strlen(tmpbuf)); 415 if (error) 416 return (error); 417 418 for (curzone = zlist; curzone; curzone = nextzone) { 419 int i; 420 int len; 421 int offset; 422 423 nextzone = curzone->znext; 424 len = strlen(curzone->zname); 425 if (len >= (sizeof(tmpname) - 1)) 426 len = (sizeof(tmpname) - 1); 427 for(i = 0; i < sizeof(tmpname) - 1; i++) 428 tmpname[i] = ' '; 429 tmpname[i] = 0; 430 memcpy(tmpname, curzone->zname, len); 431 tmpname[len] = ':'; 432 offset = 0; 433 if (curzone == zlist) { 434 offset = 1; 435 tmpbuf[0] = '\n'; 436 } 437 438 snprintf(tmpbuf + offset, sizeof(tmpbuf) - offset, 439 "%s %6.6u, %8.8u, %6.6u, %6.6u, %8.8u\n", 440 tmpname, curzone->zsize, curzone->zmax, 441 (curzone->ztotal - curzone->zfreecnt), 442 curzone->zfreecnt, curzone->znalloc); 443 444 len = strlen((char *)tmpbuf); 445 if (nextzone == NULL) 446 tmpbuf[len - 1] = 0; 447 448 error = SYSCTL_OUT(req, tmpbuf, len); 449 450 if (error) 451 return (error); 452 } 453 return (0); 454 } 455 456 #if defined(INVARIANTS) 457 void 458 zerror(int error) 459 { 460 char *msg; 461 462 switch (error) { 463 case ZONE_ERROR_INVALID: 464 msg = "zone: invalid zone"; 465 break; 466 case ZONE_ERROR_NOTFREE: 467 msg = "zone: entry not free"; 468 break; 469 case ZONE_ERROR_ALREADYFREE: 470 msg = "zone: freeing free entry"; 471 break; 472 default: 473 msg = "zone: invalid error"; 474 break; 475 } 476 panic(msg); 477 } 478 #endif 479 480 SYSCTL_OID(_vm, OID_AUTO, zone, CTLTYPE_STRING|CTLFLAG_RD, \ 481 NULL, 0, sysctl_vm_zone, "A", "Zone Info"); 482 483 SYSCTL_INT(_vm, OID_AUTO, zone_kmem_pages, 484 CTLFLAG_RD, &zone_kmem_pages, 0, "Number of interrupt safe pages allocated by zone"); 485 SYSCTL_INT(_vm, OID_AUTO, zone_kmem_kvaspace, 486 CTLFLAG_RD, &zone_kmem_kvaspace, 0, "KVA space allocated by zone"); 487 SYSCTL_INT(_vm, OID_AUTO, zone_kern_pages, 488 CTLFLAG_RD, &zone_kern_pages, 0, "Number of non-interrupt safe pages allocated by zone"); 489