1 /* 2 * Copyright (c) 1997, 1998 John S. Dyson 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice immediately at the beginning of the file, without modification, 10 * this list of conditions, and the following disclaimer. 11 * 2. Absolutely no warranty of function or purpose is made by the author 12 * John S. Dyson. 13 * 14 * $FreeBSD: src/sys/vm/vm_zone.c,v 1.30.2.6 2002/10/10 19:50:16 dillon Exp $ 15 * $DragonFly: src/sys/vm/vm_zone.c,v 1.28 2008/01/23 17:35:48 nth Exp $ 16 */ 17 18 #include <sys/param.h> 19 #include <sys/queue.h> 20 #include <sys/systm.h> 21 #include <sys/kernel.h> 22 #include <sys/lock.h> 23 #include <sys/malloc.h> 24 #include <sys/sysctl.h> 25 #include <sys/vmmeter.h> 26 27 #include <vm/vm.h> 28 #include <vm/vm_object.h> 29 #include <vm/vm_page.h> 30 #include <vm/vm_map.h> 31 #include <vm/vm_kern.h> 32 #include <vm/vm_extern.h> 33 #include <vm/vm_zone.h> 34 35 #include <sys/spinlock2.h> 36 #include <sys/mplock2.h> 37 38 static MALLOC_DEFINE(M_ZONE, "ZONE", "Zone header"); 39 40 #define ZONE_ERROR_INVALID 0 41 #define ZONE_ERROR_NOTFREE 1 42 #define ZONE_ERROR_ALREADYFREE 2 43 44 #define ZONE_ROUNDING 32 45 46 #define ZENTRY_FREE 0x12342378 47 48 static void *zget(vm_zone_t z); 49 50 /* 51 * Return an item from the specified zone. This function is non-blocking for 52 * ZONE_INTERRUPT zones. 53 */ 54 void * 55 zalloc(vm_zone_t z) 56 { 57 void *item; 58 59 #ifdef INVARIANTS 60 if (z == NULL) 61 zerror(ZONE_ERROR_INVALID); 62 #endif 63 spin_lock_wr(&z->zlock); 64 if (z->zfreecnt > z->zfreemin) { 65 item = z->zitems; 66 #ifdef INVARIANTS 67 KASSERT(item != NULL, ("zitems unexpectedly NULL")); 68 if (((void **) item)[1] != (void *) ZENTRY_FREE) 69 zerror(ZONE_ERROR_NOTFREE); 70 ((void **) item)[1] = 0; 71 #endif 72 z->zitems = ((void **) item)[0]; 73 z->zfreecnt--; 74 z->znalloc++; 75 spin_unlock_wr(&z->zlock); 76 } else { 77 spin_unlock_wr(&z->zlock); 78 item = zget(z); 79 /* 80 * PANICFAIL allows the caller to assume that the zalloc() 81 * will always succeed. If it doesn't, we panic here. 82 */ 83 if (item == NULL && (z->zflags & ZONE_PANICFAIL)) 84 panic("zalloc(%s) failed", z->zname); 85 } 86 return item; 87 } 88 89 /* 90 * Free an item to the specified zone. 91 */ 92 void 93 zfree(vm_zone_t z, void *item) 94 { 95 96 spin_lock_wr(&z->zlock); 97 ((void **) item)[0] = z->zitems; 98 #ifdef INVARIANTS 99 if (((void **) item)[1] == (void *) ZENTRY_FREE) 100 zerror(ZONE_ERROR_ALREADYFREE); 101 ((void **) item)[1] = (void *) ZENTRY_FREE; 102 #endif 103 z->zitems = item; 104 z->zfreecnt++; 105 spin_unlock_wr(&z->zlock); 106 } 107 108 /* 109 * This file comprises a very simple zone allocator. This is used 110 * in lieu of the malloc allocator, where needed or more optimal. 111 * 112 * Note that the initial implementation of this had coloring, and 113 * absolutely no improvement (actually perf degradation) occurred. 114 * 115 * Note also that the zones are type stable. The only restriction is 116 * that the first two longwords of a data structure can be changed 117 * between allocations. Any data that must be stable between allocations 118 * must reside in areas after the first two longwords. 119 * 120 * zinitna, zinit, zbootinit are the initialization routines. 121 * zalloc, zfree, are the allocation/free routines. 122 */ 123 124 LIST_HEAD(zlist, vm_zone) zlist = LIST_HEAD_INITIALIZER(zlist); 125 static int sysctl_vm_zone(SYSCTL_HANDLER_ARGS); 126 static int zone_kmem_pages, zone_kern_pages, zone_kmem_kvaspace; 127 128 /* 129 * Create a zone, but don't allocate the zone structure. If the 130 * zone had been previously created by the zone boot code, initialize 131 * various parts of the zone code. 132 * 133 * If waits are not allowed during allocation (e.g. during interrupt 134 * code), a-priori allocate the kernel virtual space, and allocate 135 * only pages when needed. 136 * 137 * Arguments: 138 * z pointer to zone structure. 139 * obj pointer to VM object (opt). 140 * name name of zone. 141 * size size of zone entries. 142 * nentries number of zone entries allocated (only ZONE_INTERRUPT.) 143 * flags ZONE_INTERRUPT -- items can be allocated at interrupt time. 144 * zalloc number of pages allocated when memory is needed. 145 * 146 * Note that when using ZONE_INTERRUPT, the size of the zone is limited 147 * by the nentries argument. The size of the memory allocatable is 148 * unlimited if ZONE_INTERRUPT is not set. 149 * 150 */ 151 int 152 zinitna(vm_zone_t z, vm_object_t obj, char *name, int size, 153 int nentries, int flags, int zalloc) 154 { 155 int totsize; 156 157 /* 158 * Only zones created with zinit() are destroyable. 159 */ 160 if (z->zflags & ZONE_DESTROYABLE) 161 panic("zinitna: can't create destroyable zone"); 162 163 /* 164 * NOTE: We can only adjust zsize if we previously did not 165 * use zbootinit(). 166 */ 167 if ((z->zflags & ZONE_BOOT) == 0) { 168 z->zsize = (size + ZONE_ROUNDING - 1) & ~(ZONE_ROUNDING - 1); 169 spin_init(&z->zlock); 170 z->zfreecnt = 0; 171 z->ztotal = 0; 172 z->zmax = 0; 173 z->zname = name; 174 z->znalloc = 0; 175 z->zitems = NULL; 176 177 LIST_INSERT_HEAD(&zlist, z, zlink); 178 } 179 180 z->zkmvec = NULL; 181 z->zkmcur = z->zkmmax = 0; 182 z->zflags |= flags; 183 184 /* 185 * If we cannot wait, allocate KVA space up front, and we will fill 186 * in pages as needed. This is particularly required when creating 187 * an allocation space for map entries in kernel_map, because we 188 * do not want to go into a recursion deadlock with 189 * vm_map_entry_reserve(). 190 */ 191 if (z->zflags & ZONE_INTERRUPT) { 192 totsize = round_page(z->zsize * nentries); 193 zone_kmem_kvaspace += totsize; 194 195 z->zkva = kmem_alloc_pageable(&kernel_map, totsize); 196 if (z->zkva == 0) { 197 LIST_REMOVE(z, zlink); 198 return 0; 199 } 200 201 z->zpagemax = totsize / PAGE_SIZE; 202 if (obj == NULL) { 203 z->zobj = vm_object_allocate(OBJT_DEFAULT, z->zpagemax); 204 } else { 205 z->zobj = obj; 206 _vm_object_allocate(OBJT_DEFAULT, z->zpagemax, obj); 207 } 208 z->zallocflag = VM_ALLOC_SYSTEM | VM_ALLOC_INTERRUPT; 209 z->zmax += nentries; 210 } else { 211 z->zallocflag = VM_ALLOC_NORMAL | VM_ALLOC_SYSTEM; 212 z->zmax = 0; 213 } 214 215 216 if (z->zsize > PAGE_SIZE) 217 z->zfreemin = 1; 218 else 219 z->zfreemin = PAGE_SIZE / z->zsize; 220 221 z->zpagecount = 0; 222 if (zalloc) 223 z->zalloc = zalloc; 224 else 225 z->zalloc = 1; 226 227 /* 228 * Populate the interrrupt zone at creation time rather than 229 * on first allocation, as this is a potentially long operation. 230 */ 231 if (z->zflags & ZONE_INTERRUPT) { 232 void *buf; 233 234 buf = zget(z); 235 zfree(z, buf); 236 } 237 238 return 1; 239 } 240 241 /* 242 * Subroutine same as zinitna, except zone data structure is allocated 243 * automatically by malloc. This routine should normally be used, except 244 * in certain tricky startup conditions in the VM system -- then 245 * zbootinit and zinitna can be used. Zinit is the standard zone 246 * initialization call. 247 */ 248 vm_zone_t 249 zinit(char *name, int size, int nentries, int flags, int zalloc) 250 { 251 vm_zone_t z; 252 253 z = (vm_zone_t) kmalloc(sizeof (struct vm_zone), M_ZONE, M_NOWAIT); 254 if (z == NULL) 255 return NULL; 256 257 z->zflags = 0; 258 if (zinitna(z, NULL, name, size, nentries, 259 flags & ~ZONE_DESTROYABLE, zalloc) == 0) { 260 kfree(z, M_ZONE); 261 return NULL; 262 } 263 264 if (flags & ZONE_DESTROYABLE) 265 z->zflags |= ZONE_DESTROYABLE; 266 267 return z; 268 } 269 270 /* 271 * Initialize a zone before the system is fully up. This routine should 272 * only be called before full VM startup. 273 */ 274 void 275 zbootinit(vm_zone_t z, char *name, int size, void *item, int nitems) 276 { 277 int i; 278 279 z->zname = name; 280 z->zsize = size; 281 z->zpagemax = 0; 282 z->zobj = NULL; 283 z->zflags = ZONE_BOOT; 284 z->zfreemin = 0; 285 z->zallocflag = 0; 286 z->zpagecount = 0; 287 z->zalloc = 0; 288 z->znalloc = 0; 289 spin_init(&z->zlock); 290 291 bzero(item, nitems * z->zsize); 292 z->zitems = NULL; 293 for (i = 0; i < nitems; i++) { 294 ((void **) item)[0] = z->zitems; 295 #ifdef INVARIANTS 296 ((void **) item)[1] = (void *) ZENTRY_FREE; 297 #endif 298 z->zitems = item; 299 item = (uint8_t *)item + z->zsize; 300 } 301 z->zfreecnt = nitems; 302 z->zmax = nitems; 303 z->ztotal = nitems; 304 305 LIST_INSERT_HEAD(&zlist, z, zlink); 306 } 307 308 /* 309 * Release all resources owned by zone created with zinit(). 310 */ 311 void 312 zdestroy(vm_zone_t z) 313 { 314 int i; 315 316 if (z == NULL) 317 panic("zdestroy: null zone"); 318 if ((z->zflags & ZONE_DESTROYABLE) == 0) 319 panic("zdestroy: undestroyable zone"); 320 321 LIST_REMOVE(z, zlink); 322 323 /* 324 * Release virtual mappings, physical memory and update sysctl stats. 325 */ 326 if (z->zflags & ZONE_INTERRUPT) { 327 /* 328 * Pages mapped via pmap_kenter() must be removed from the 329 * kernel_pmap() before calling kmem_free() to avoid issues 330 * with kernel_pmap.pm_stats.resident_count. 331 */ 332 pmap_qremove(z->zkva, z->zpagemax); 333 334 /* 335 * Free the mapping. 336 */ 337 kmem_free(&kernel_map, z->zkva, z->zpagemax*PAGE_SIZE); 338 atomic_subtract_int(&zone_kmem_kvaspace, z->zpagemax*PAGE_SIZE); 339 340 /* 341 * Free the backing object and physical pages. 342 */ 343 vm_object_deallocate(z->zobj); 344 atomic_subtract_int(&zone_kmem_pages, z->zpagecount); 345 } else { 346 for (i=0; i < z->zkmcur; i++) { 347 kmem_free(&kernel_map, z->zkmvec[i], 348 z->zalloc*PAGE_SIZE); 349 atomic_subtract_int(&zone_kern_pages, z->zalloc); 350 } 351 if (z->zkmvec != NULL) 352 kfree(z->zkmvec, M_ZONE); 353 } 354 355 spin_uninit(&z->zlock); 356 kfree(z, M_ZONE); 357 } 358 359 360 /* 361 * void *zalloc(vm_zone_t zone) -- 362 * Returns an item from a specified zone. May not be called from a 363 * FAST interrupt or IPI function. 364 * 365 * void zfree(vm_zone_t zone, void *item) -- 366 * Frees an item back to a specified zone. May not be called from a 367 * FAST interrupt or IPI function. 368 */ 369 370 /* 371 * Internal zone routine. Not to be called from external (non vm_zone) code. 372 */ 373 static void * 374 zget(vm_zone_t z) 375 { 376 int i; 377 vm_page_t m; 378 int nitems, nbytes; 379 int savezpc; 380 void *item; 381 382 if (z == NULL) 383 panic("zget: null zone"); 384 385 if (z->zflags & ZONE_INTERRUPT) { 386 /* 387 * Interrupt zones do not mess with the kernel_map, they 388 * simply populate an existing mapping. 389 */ 390 get_mplock(); 391 savezpc = z->zpagecount; 392 nbytes = z->zpagecount * PAGE_SIZE; 393 nbytes -= nbytes % z->zsize; 394 item = (char *) z->zkva + nbytes; 395 for (i = 0; ((i < z->zalloc) && (z->zpagecount < z->zpagemax)); 396 i++) { 397 vm_offset_t zkva; 398 399 m = vm_page_alloc(z->zobj, z->zpagecount, 400 z->zallocflag); 401 /* note: z might be modified due to blocking */ 402 if (m == NULL) 403 break; 404 405 /* 406 * Unbusy page so it can freed in zdestroy(). Make 407 * sure it is not on any queue and so can not be 408 * recycled under our feet. 409 */ 410 KKASSERT(m->queue == PQ_NONE); 411 vm_page_flag_clear(m, PG_BUSY); 412 413 zkva = z->zkva + z->zpagecount * PAGE_SIZE; 414 pmap_kenter(zkva, VM_PAGE_TO_PHYS(m)); /* YYY */ 415 bzero((void *)zkva, PAGE_SIZE); 416 KKASSERT(savezpc == z->zpagecount); 417 ++savezpc; 418 z->zpagecount++; 419 zone_kmem_pages++; 420 vmstats.v_wire_count++; 421 } 422 nitems = ((z->zpagecount * PAGE_SIZE) - nbytes) / z->zsize; 423 rel_mplock(); 424 } else if (z->zflags & ZONE_SPECIAL) { 425 /* 426 * The special zone is the one used for vm_map_entry_t's. 427 * We have to avoid an infinite recursion in 428 * vm_map_entry_reserve() by using vm_map_entry_kreserve() 429 * instead. The map entries are pre-reserved by the kernel 430 * by vm_map_entry_reserve_cpu_init(). 431 */ 432 nbytes = z->zalloc * PAGE_SIZE; 433 434 item = (void *)kmem_alloc3(&kernel_map, nbytes, KM_KRESERVE); 435 436 /* note: z might be modified due to blocking */ 437 if (item != NULL) { 438 zone_kern_pages += z->zalloc; /* not MP-safe XXX */ 439 bzero(item, nbytes); 440 } else { 441 nbytes = 0; 442 } 443 nitems = nbytes / z->zsize; 444 } else { 445 /* 446 * Otherwise allocate KVA from the kernel_map. 447 */ 448 nbytes = z->zalloc * PAGE_SIZE; 449 450 item = (void *)kmem_alloc3(&kernel_map, nbytes, 0); 451 452 /* note: z might be modified due to blocking */ 453 if (item != NULL) { 454 zone_kern_pages += z->zalloc; /* not MP-safe XXX */ 455 bzero(item, nbytes); 456 457 if (z->zflags & ZONE_DESTROYABLE) { 458 if (z->zkmcur == z->zkmmax) { 459 z->zkmmax = 460 z->zkmmax==0 ? 1 : z->zkmmax*2; 461 z->zkmvec = krealloc(z->zkmvec, 462 z->zkmmax * sizeof(z->zkmvec[0]), 463 M_ZONE, M_WAITOK); 464 } 465 z->zkmvec[z->zkmcur++] = (vm_offset_t)item; 466 } 467 } else { 468 nbytes = 0; 469 } 470 nitems = nbytes / z->zsize; 471 } 472 473 spin_lock_wr(&z->zlock); 474 z->ztotal += nitems; 475 /* 476 * Save one for immediate allocation 477 */ 478 if (nitems != 0) { 479 nitems -= 1; 480 for (i = 0; i < nitems; i++) { 481 ((void **) item)[0] = z->zitems; 482 #ifdef INVARIANTS 483 ((void **) item)[1] = (void *) ZENTRY_FREE; 484 #endif 485 z->zitems = item; 486 item = (uint8_t *)item + z->zsize; 487 } 488 z->zfreecnt += nitems; 489 z->znalloc++; 490 } else if (z->zfreecnt > 0) { 491 item = z->zitems; 492 z->zitems = ((void **) item)[0]; 493 #ifdef INVARIANTS 494 if (((void **) item)[1] != (void *) ZENTRY_FREE) 495 zerror(ZONE_ERROR_NOTFREE); 496 ((void **) item)[1] = 0; 497 #endif 498 z->zfreecnt--; 499 z->znalloc++; 500 } else { 501 item = NULL; 502 } 503 spin_unlock_wr(&z->zlock); 504 505 /* 506 * A special zone may have used a kernel-reserved vm_map_entry. If 507 * so we have to be sure to recover our reserve so we don't run out. 508 * We will panic if we run out. 509 */ 510 if (z->zflags & ZONE_SPECIAL) 511 vm_map_entry_reserve(0); 512 513 return item; 514 } 515 516 static int 517 sysctl_vm_zone(SYSCTL_HANDLER_ARGS) 518 { 519 int error=0; 520 vm_zone_t curzone; 521 char tmpbuf[128]; 522 char tmpname[14]; 523 524 ksnprintf(tmpbuf, sizeof(tmpbuf), 525 "\nITEM SIZE LIMIT USED FREE REQUESTS\n"); 526 error = SYSCTL_OUT(req, tmpbuf, strlen(tmpbuf)); 527 if (error) 528 return (error); 529 530 LIST_FOREACH(curzone, &zlist, zlink) { 531 int i; 532 int len; 533 int offset; 534 535 len = strlen(curzone->zname); 536 if (len >= (sizeof(tmpname) - 1)) 537 len = (sizeof(tmpname) - 1); 538 for(i = 0; i < sizeof(tmpname) - 1; i++) 539 tmpname[i] = ' '; 540 tmpname[i] = 0; 541 memcpy(tmpname, curzone->zname, len); 542 tmpname[len] = ':'; 543 offset = 0; 544 if (curzone == LIST_FIRST(&zlist)) { 545 offset = 1; 546 tmpbuf[0] = '\n'; 547 } 548 549 ksnprintf(tmpbuf + offset, sizeof(tmpbuf) - offset, 550 "%s %6.6u, %8.8u, %6.6u, %6.6u, %8.8u\n", 551 tmpname, curzone->zsize, curzone->zmax, 552 (curzone->ztotal - curzone->zfreecnt), 553 curzone->zfreecnt, curzone->znalloc); 554 555 len = strlen((char *)tmpbuf); 556 if (LIST_NEXT(curzone, zlink) == NULL) 557 tmpbuf[len - 1] = 0; 558 559 error = SYSCTL_OUT(req, tmpbuf, len); 560 561 if (error) 562 return (error); 563 } 564 return (0); 565 } 566 567 #if defined(INVARIANTS) 568 void 569 zerror(int error) 570 { 571 char *msg; 572 573 switch (error) { 574 case ZONE_ERROR_INVALID: 575 msg = "zone: invalid zone"; 576 break; 577 case ZONE_ERROR_NOTFREE: 578 msg = "zone: entry not free"; 579 break; 580 case ZONE_ERROR_ALREADYFREE: 581 msg = "zone: freeing free entry"; 582 break; 583 default: 584 msg = "zone: invalid error"; 585 break; 586 } 587 panic(msg); 588 } 589 #endif 590 591 SYSCTL_OID(_vm, OID_AUTO, zone, CTLTYPE_STRING|CTLFLAG_RD, \ 592 NULL, 0, sysctl_vm_zone, "A", "Zone Info"); 593 594 SYSCTL_INT(_vm, OID_AUTO, zone_kmem_pages, 595 CTLFLAG_RD, &zone_kmem_pages, 0, "Number of interrupt safe pages allocated by zone"); 596 SYSCTL_INT(_vm, OID_AUTO, zone_kmem_kvaspace, 597 CTLFLAG_RD, &zone_kmem_kvaspace, 0, "KVA space allocated by zone"); 598 SYSCTL_INT(_vm, OID_AUTO, zone_kern_pages, 599 CTLFLAG_RD, &zone_kern_pages, 0, "Number of non-interrupt safe pages allocated by zone"); 600