1 /* $OpenBSD: uvm_km.c,v 1.106 2011/07/03 18:34:14 oga Exp $ */ 2 /* $NetBSD: uvm_km.c,v 1.42 2001/01/14 02:10:01 thorpej Exp $ */ 3 4 /* 5 * Copyright (c) 1997 Charles D. Cranor and Washington University. 6 * Copyright (c) 1991, 1993, The Regents of the University of California. 7 * 8 * All rights reserved. 9 * 10 * This code is derived from software contributed to Berkeley by 11 * The Mach Operating System project at Carnegie-Mellon University. 12 * 13 * Redistribution and use in source and binary forms, with or without 14 * modification, are permitted provided that the following conditions 15 * are met: 16 * 1. Redistributions of source code must retain the above copyright 17 * notice, this list of conditions and the following disclaimer. 18 * 2. Redistributions in binary form must reproduce the above copyright 19 * notice, this list of conditions and the following disclaimer in the 20 * documentation and/or other materials provided with the distribution. 21 * 3. All advertising materials mentioning features or use of this software 22 * must display the following acknowledgement: 23 * This product includes software developed by Charles D. Cranor, 24 * Washington University, the University of California, Berkeley and 25 * its contributors. 26 * 4. Neither the name of the University nor the names of its contributors 27 * may be used to endorse or promote products derived from this software 28 * without specific prior written permission. 29 * 30 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 31 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 32 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 33 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 34 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 35 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 36 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 37 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 38 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 39 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 40 * SUCH DAMAGE. 41 * 42 * @(#)vm_kern.c 8.3 (Berkeley) 1/12/94 43 * from: Id: uvm_km.c,v 1.1.2.14 1998/02/06 05:19:27 chs Exp 44 * 45 * 46 * Copyright (c) 1987, 1990 Carnegie-Mellon University. 47 * All rights reserved. 48 * 49 * Permission to use, copy, modify and distribute this software and 50 * its documentation is hereby granted, provided that both the copyright 51 * notice and this permission notice appear in all copies of the 52 * software, derivative works or modified versions, and any portions 53 * thereof, and that both notices appear in supporting documentation. 54 * 55 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" 56 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND 57 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. 58 * 59 * Carnegie Mellon requests users of this software to return to 60 * 61 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU 62 * School of Computer Science 63 * Carnegie Mellon University 64 * Pittsburgh PA 15213-3890 65 * 66 * any improvements or extensions that they make and grant Carnegie the 67 * rights to redistribute these changes. 68 */ 69 70 /* 71 * uvm_km.c: handle kernel memory allocation and management 72 */ 73 74 /* 75 * overview of kernel memory management: 76 * 77 * the kernel virtual address space is mapped by "kernel_map." kernel_map 78 * starts at VM_MIN_KERNEL_ADDRESS and goes to VM_MAX_KERNEL_ADDRESS. 79 * note that VM_MIN_KERNEL_ADDRESS is equal to vm_map_min(kernel_map). 80 * 81 * the kernel_map has several "submaps." submaps can only appear in 82 * the kernel_map (user processes can't use them). submaps "take over" 83 * the management of a sub-range of the kernel's address space. submaps 84 * are typically allocated at boot time and are never released. kernel 85 * virtual address space that is mapped by a submap is locked by the 86 * submap's lock -- not the kernel_map's lock. 87 * 88 * thus, the useful feature of submaps is that they allow us to break 89 * up the locking and protection of the kernel address space into smaller 90 * chunks. 91 * 92 * The VM system has several standard kernel submaps: 93 * kmem_map: Contains only wired kernel memory for malloc(9). 94 * Note: All access to this map must be protected by splvm as 95 * calls to malloc(9) are allowed in interrupt handlers. 96 * exec_map: Memory to hold arguments to system calls are allocated from 97 * this map. 98 * XXX: This is primeraly used to artificially limit the number 99 * of concurrent processes doing an exec. 100 * phys_map: Buffers for vmapbuf (physio) are allocated from this map. 101 * 102 * the kernel allocates its private memory out of special uvm_objects whose 103 * reference count is set to UVM_OBJ_KERN (thus indicating that the objects 104 * are "special" and never die). all kernel objects should be thought of 105 * as large, fixed-sized, sparsely populated uvm_objects. each kernel 106 * object is equal to the size of kernel virtual address space (i.e. the 107 * value "VM_MAX_KERNEL_ADDRESS - VM_MIN_KERNEL_ADDRESS"). 108 * 109 * most kernel private memory lives in kernel_object. the only exception 110 * to this is for memory that belongs to submaps that must be protected 111 * by splvm(). each of these submaps manages their own pages. 112 * 113 * note that just because a kernel object spans the entire kernel virtual 114 * address space doesn't mean that it has to be mapped into the entire space. 115 * large chunks of a kernel object's space go unused either because 116 * that area of kernel VM is unmapped, or there is some other type of 117 * object mapped into that range (e.g. a vnode). for submap's kernel 118 * objects, the only part of the object that can ever be populated is the 119 * offsets that are managed by the submap. 120 * 121 * note that the "offset" in a kernel object is always the kernel virtual 122 * address minus the VM_MIN_KERNEL_ADDRESS (aka vm_map_min(kernel_map)). 123 * example: 124 * suppose VM_MIN_KERNEL_ADDRESS is 0xf8000000 and the kernel does a 125 * uvm_km_alloc(kernel_map, PAGE_SIZE) [allocate 1 wired down page in the 126 * kernel map]. if uvm_km_alloc returns virtual address 0xf8235000, 127 * then that means that the page at offset 0x235000 in kernel_object is 128 * mapped at 0xf8235000. 129 * 130 * kernel objects have one other special property: when the kernel virtual 131 * memory mapping them is unmapped, the backing memory in the object is 132 * freed right away. this is done with the uvm_km_pgremove() function. 133 * this has to be done because there is no backing store for kernel pages 134 * and no need to save them after they are no longer referenced. 135 */ 136 137 #include <sys/param.h> 138 #include <sys/systm.h> 139 #include <sys/proc.h> 140 #include <sys/kthread.h> 141 142 #include <uvm/uvm.h> 143 144 /* 145 * global data structures 146 */ 147 148 struct vm_map *kernel_map = NULL; 149 150 /* Unconstraint range. */ 151 struct uvm_constraint_range no_constraint = { 0x0, (paddr_t)-1 }; 152 153 /* 154 * local data structues 155 */ 156 157 static struct vm_map kernel_map_store; 158 159 /* 160 * uvm_km_init: init kernel maps and objects to reflect reality (i.e. 161 * KVM already allocated for text, data, bss, and static data structures). 162 * 163 * => KVM is defined by VM_MIN_KERNEL_ADDRESS/VM_MAX_KERNEL_ADDRESS. 164 * we assume that [min -> start] has already been allocated and that 165 * "end" is the end. 166 */ 167 168 void 169 uvm_km_init(vaddr_t start, vaddr_t end) 170 { 171 vaddr_t base = VM_MIN_KERNEL_ADDRESS; 172 173 /* 174 * next, init kernel memory objects. 175 */ 176 177 /* kernel_object: for pageable anonymous kernel memory */ 178 uao_init(); 179 uvm.kernel_object = uao_create(VM_MAX_KERNEL_ADDRESS - 180 VM_MIN_KERNEL_ADDRESS, UAO_FLAG_KERNOBJ); 181 182 /* 183 * init the map and reserve already allocated kernel space 184 * before installing. 185 */ 186 187 uvm_map_setup(&kernel_map_store, base, end, VM_MAP_PAGEABLE); 188 kernel_map_store.pmap = pmap_kernel(); 189 if (base != start && uvm_map(&kernel_map_store, &base, start - base, 190 NULL, UVM_UNKNOWN_OFFSET, 0, UVM_MAPFLAG(UVM_PROT_ALL, UVM_PROT_ALL, 191 UVM_INH_NONE, UVM_ADV_RANDOM,UVM_FLAG_FIXED)) != 0) 192 panic("uvm_km_init: could not reserve space for kernel"); 193 194 /* 195 * install! 196 */ 197 198 kernel_map = &kernel_map_store; 199 } 200 201 /* 202 * uvm_km_suballoc: allocate a submap in the kernel map. once a submap 203 * is allocated all references to that area of VM must go through it. this 204 * allows the locking of VAs in kernel_map to be broken up into regions. 205 * 206 * => if `fixed' is true, *min specifies where the region described 207 * by the submap must start 208 * => if submap is non NULL we use that as the submap, otherwise we 209 * alloc a new map 210 */ 211 struct vm_map * 212 uvm_km_suballoc(struct vm_map *map, vaddr_t *min, vaddr_t *max, vsize_t size, 213 int flags, boolean_t fixed, struct vm_map *submap) 214 { 215 int mapflags = UVM_FLAG_NOMERGE | (fixed ? UVM_FLAG_FIXED : 0); 216 217 size = round_page(size); /* round up to pagesize */ 218 219 /* 220 * first allocate a blank spot in the parent map 221 */ 222 223 if (uvm_map(map, min, size, NULL, UVM_UNKNOWN_OFFSET, 0, 224 UVM_MAPFLAG(UVM_PROT_ALL, UVM_PROT_ALL, UVM_INH_NONE, 225 UVM_ADV_RANDOM, mapflags)) != 0) { 226 panic("uvm_km_suballoc: unable to allocate space in parent map"); 227 } 228 229 /* 230 * set VM bounds (min is filled in by uvm_map) 231 */ 232 233 *max = *min + size; 234 235 /* 236 * add references to pmap and create or init the submap 237 */ 238 239 pmap_reference(vm_map_pmap(map)); 240 if (submap == NULL) { 241 submap = uvm_map_create(vm_map_pmap(map), *min, *max, flags); 242 if (submap == NULL) 243 panic("uvm_km_suballoc: unable to create submap"); 244 } else { 245 uvm_map_setup(submap, *min, *max, flags); 246 submap->pmap = vm_map_pmap(map); 247 } 248 249 /* 250 * now let uvm_map_submap plug in it... 251 */ 252 253 if (uvm_map_submap(map, *min, *max, submap) != 0) 254 panic("uvm_km_suballoc: submap allocation failed"); 255 256 return(submap); 257 } 258 259 /* 260 * uvm_km_pgremove: remove pages from a kernel uvm_object. 261 * 262 * => when you unmap a part of anonymous kernel memory you want to toss 263 * the pages right away. (this gets called from uvm_unmap_...). 264 */ 265 void 266 uvm_km_pgremove(struct uvm_object *uobj, vaddr_t start, vaddr_t end) 267 { 268 struct vm_page *pp; 269 voff_t curoff; 270 int slot; 271 272 KASSERT(uobj->pgops == &aobj_pager); 273 274 for (curoff = start ; curoff < end ; curoff += PAGE_SIZE) { 275 pp = uvm_pagelookup(uobj, curoff); 276 if (pp && pp->pg_flags & PG_BUSY) { 277 atomic_setbits_int(&pp->pg_flags, PG_WANTED); 278 UVM_UNLOCK_AND_WAIT(pp, &uobj->vmobjlock, 0, 279 "km_pgrm", 0); 280 simple_lock(&uobj->vmobjlock); 281 curoff -= PAGE_SIZE; /* loop back to us */ 282 continue; 283 } 284 285 /* free the swap slot, then the page */ 286 slot = uao_dropswap(uobj, curoff >> PAGE_SHIFT); 287 288 if (pp != NULL) { 289 uvm_lock_pageq(); 290 uvm_pagefree(pp); 291 uvm_unlock_pageq(); 292 } else if (slot != 0) { 293 simple_lock(&uvm.swap_data_lock); 294 uvmexp.swpgonly--; 295 simple_unlock(&uvm.swap_data_lock); 296 } 297 } 298 } 299 300 301 /* 302 * uvm_km_pgremove_intrsafe: like uvm_km_pgremove(), but for "intrsafe" 303 * objects 304 * 305 * => when you unmap a part of anonymous kernel memory you want to toss 306 * the pages right away. (this gets called from uvm_unmap_...). 307 * => none of the pages will ever be busy, and none of them will ever 308 * be on the active or inactive queues (because these objects are 309 * never allowed to "page"). 310 */ 311 312 void 313 uvm_km_pgremove_intrsafe(vaddr_t start, vaddr_t end) 314 { 315 struct vm_page *pg; 316 vaddr_t va; 317 paddr_t pa; 318 319 for (va = start; va < end; va += PAGE_SIZE) { 320 if (!pmap_extract(pmap_kernel(), va, &pa)) 321 continue; 322 pg = PHYS_TO_VM_PAGE(pa); 323 if (pg == NULL) 324 panic("uvm_km_pgremove_intrsafe: no page"); 325 uvm_pagefree(pg); 326 } 327 } 328 329 /* 330 * uvm_km_kmemalloc: lower level kernel memory allocator for malloc() 331 * 332 * => we map wired memory into the specified map using the obj passed in 333 * => NOTE: we can return NULL even if we can wait if there is not enough 334 * free VM space in the map... caller should be prepared to handle 335 * this case. 336 * => we return KVA of memory allocated 337 * => flags: NOWAIT, VALLOC - just allocate VA, TRYLOCK - fail if we can't 338 * lock the map 339 * => low, high, alignment, boundary, nsegs are the corresponding parameters 340 * to uvm_pglistalloc 341 * => flags: ZERO - correspond to uvm_pglistalloc flags 342 */ 343 344 vaddr_t 345 uvm_km_kmemalloc_pla(struct vm_map *map, struct uvm_object *obj, vsize_t size, 346 vsize_t valign, int flags, paddr_t low, paddr_t high, paddr_t alignment, 347 paddr_t boundary, int nsegs) 348 { 349 vaddr_t kva, loopva; 350 voff_t offset; 351 struct vm_page *pg; 352 struct pglist pgl; 353 int pla_flags; 354 355 KASSERT(vm_map_pmap(map) == pmap_kernel()); 356 /* UVM_KMF_VALLOC => !UVM_KMF_ZERO */ 357 KASSERT(!(flags & UVM_KMF_VALLOC) || 358 !(flags & UVM_KMF_ZERO)); 359 360 /* 361 * setup for call 362 */ 363 364 size = round_page(size); 365 kva = vm_map_min(map); /* hint */ 366 if (nsegs == 0) 367 nsegs = atop(size); 368 369 /* 370 * allocate some virtual space 371 */ 372 373 if (__predict_false(uvm_map(map, &kva, size, obj, UVM_UNKNOWN_OFFSET, 374 valign, UVM_MAPFLAG(UVM_PROT_RW, UVM_PROT_RW, UVM_INH_NONE, 375 UVM_ADV_RANDOM, (flags & UVM_KMF_TRYLOCK))) != 0)) { 376 return(0); 377 } 378 379 /* 380 * if all we wanted was VA, return now 381 */ 382 383 if (flags & UVM_KMF_VALLOC) { 384 return(kva); 385 } 386 387 /* 388 * recover object offset from virtual address 389 */ 390 391 if (obj != NULL) 392 offset = kva - vm_map_min(kernel_map); 393 else 394 offset = 0; 395 396 /* 397 * now allocate and map in the memory... note that we are the only ones 398 * whom should ever get a handle on this area of VM. 399 */ 400 TAILQ_INIT(&pgl); 401 pla_flags = 0; 402 if ((flags & UVM_KMF_NOWAIT) || 403 ((flags & UVM_KMF_CANFAIL) && 404 uvmexp.swpgonly - uvmexp.swpages <= atop(size))) 405 pla_flags |= UVM_PLA_NOWAIT; 406 else 407 pla_flags |= UVM_PLA_WAITOK; 408 if (flags & UVM_KMF_ZERO) 409 pla_flags |= UVM_PLA_ZERO; 410 if (uvm_pglistalloc(size, low, high, alignment, boundary, &pgl, nsegs, 411 pla_flags) != 0) { 412 /* Failed. */ 413 uvm_unmap(map, kva, kva + size); 414 return (0); 415 } 416 417 loopva = kva; 418 while (loopva != kva + size) { 419 pg = TAILQ_FIRST(&pgl); 420 TAILQ_REMOVE(&pgl, pg, pageq); 421 uvm_pagealloc_pg(pg, obj, offset, NULL); 422 atomic_clearbits_int(&pg->pg_flags, PG_BUSY); 423 UVM_PAGE_OWN(pg, NULL); 424 425 /* 426 * map it in: note that we call pmap_enter with the map and 427 * object unlocked in case we are kmem_map. 428 */ 429 430 if (obj == NULL) { 431 pmap_kenter_pa(loopva, VM_PAGE_TO_PHYS(pg), 432 UVM_PROT_RW); 433 } else { 434 pmap_enter(map->pmap, loopva, VM_PAGE_TO_PHYS(pg), 435 UVM_PROT_RW, 436 PMAP_WIRED | VM_PROT_READ | VM_PROT_WRITE); 437 } 438 loopva += PAGE_SIZE; 439 offset += PAGE_SIZE; 440 } 441 KASSERT(TAILQ_EMPTY(&pgl)); 442 pmap_update(pmap_kernel()); 443 444 return(kva); 445 } 446 447 /* 448 * uvm_km_free: free an area of kernel memory 449 */ 450 451 void 452 uvm_km_free(struct vm_map *map, vaddr_t addr, vsize_t size) 453 { 454 uvm_unmap(map, trunc_page(addr), round_page(addr+size)); 455 } 456 457 /* 458 * uvm_km_free_wakeup: free an area of kernel memory and wake up 459 * anyone waiting for vm space. 460 * 461 * => XXX: "wanted" bit + unlock&wait on other end? 462 */ 463 464 void 465 uvm_km_free_wakeup(struct vm_map *map, vaddr_t addr, vsize_t size) 466 { 467 struct vm_map_entry *dead_entries; 468 469 vm_map_lock(map); 470 uvm_unmap_remove(map, trunc_page(addr), round_page(addr+size), 471 &dead_entries, NULL, FALSE); 472 wakeup(map); 473 vm_map_unlock(map); 474 475 if (dead_entries != NULL) 476 uvm_unmap_detach(dead_entries, 0); 477 } 478 479 /* 480 * uvm_km_alloc1: allocate wired down memory in the kernel map. 481 * 482 * => we can sleep if needed 483 */ 484 485 vaddr_t 486 uvm_km_alloc1(struct vm_map *map, vsize_t size, vsize_t align, boolean_t zeroit) 487 { 488 vaddr_t kva, loopva; 489 voff_t offset; 490 struct vm_page *pg; 491 492 KASSERT(vm_map_pmap(map) == pmap_kernel()); 493 494 size = round_page(size); 495 kva = vm_map_min(map); /* hint */ 496 497 /* 498 * allocate some virtual space 499 */ 500 501 if (__predict_false(uvm_map(map, &kva, size, uvm.kernel_object, 502 UVM_UNKNOWN_OFFSET, align, UVM_MAPFLAG(UVM_PROT_ALL, UVM_PROT_ALL, 503 UVM_INH_NONE, UVM_ADV_RANDOM, 0)) != 0)) { 504 return(0); 505 } 506 507 /* 508 * recover object offset from virtual address 509 */ 510 511 offset = kva - vm_map_min(kernel_map); 512 513 /* 514 * now allocate the memory. we must be careful about released pages. 515 */ 516 517 loopva = kva; 518 while (size) { 519 simple_lock(&uvm.kernel_object->vmobjlock); 520 /* allocate ram */ 521 pg = uvm_pagealloc(uvm.kernel_object, offset, NULL, 0); 522 if (pg) { 523 atomic_clearbits_int(&pg->pg_flags, PG_BUSY); 524 UVM_PAGE_OWN(pg, NULL); 525 } 526 simple_unlock(&uvm.kernel_object->vmobjlock); 527 if (__predict_false(pg == NULL)) { 528 if (curproc == uvm.pagedaemon_proc) { 529 /* 530 * It is unfeasible for the page daemon to 531 * sleep for memory, so free what we have 532 * allocated and fail. 533 */ 534 uvm_unmap(map, kva, loopva - kva); 535 return (0); 536 } else { 537 uvm_wait("km_alloc1w"); /* wait for memory */ 538 continue; 539 } 540 } 541 542 /* 543 * map it in; note we're never called with an intrsafe 544 * object, so we always use regular old pmap_enter(). 545 */ 546 pmap_enter(map->pmap, loopva, VM_PAGE_TO_PHYS(pg), 547 UVM_PROT_ALL, PMAP_WIRED | VM_PROT_READ | VM_PROT_WRITE); 548 549 loopva += PAGE_SIZE; 550 offset += PAGE_SIZE; 551 size -= PAGE_SIZE; 552 } 553 pmap_update(map->pmap); 554 555 /* 556 * zero on request (note that "size" is now zero due to the above loop 557 * so we need to subtract kva from loopva to reconstruct the size). 558 */ 559 560 if (zeroit) 561 memset((caddr_t)kva, 0, loopva - kva); 562 563 return(kva); 564 } 565 566 /* 567 * uvm_km_valloc: allocate zero-fill memory in the kernel's address space 568 * 569 * => memory is not allocated until fault time 570 */ 571 572 vaddr_t 573 uvm_km_valloc(struct vm_map *map, vsize_t size) 574 { 575 return(uvm_km_valloc_align(map, size, 0, 0)); 576 } 577 578 vaddr_t 579 uvm_km_valloc_try(struct vm_map *map, vsize_t size) 580 { 581 return(uvm_km_valloc_align(map, size, 0, UVM_FLAG_TRYLOCK)); 582 } 583 584 vaddr_t 585 uvm_km_valloc_align(struct vm_map *map, vsize_t size, vsize_t align, int flags) 586 { 587 vaddr_t kva; 588 589 KASSERT(vm_map_pmap(map) == pmap_kernel()); 590 591 size = round_page(size); 592 kva = vm_map_min(map); /* hint */ 593 594 /* 595 * allocate some virtual space. will be demand filled by kernel_object. 596 */ 597 598 if (__predict_false(uvm_map(map, &kva, size, uvm.kernel_object, 599 UVM_UNKNOWN_OFFSET, align, UVM_MAPFLAG(UVM_PROT_ALL, UVM_PROT_ALL, 600 UVM_INH_NONE, UVM_ADV_RANDOM, flags)) != 0)) { 601 return(0); 602 } 603 604 return(kva); 605 } 606 607 /* 608 * uvm_km_valloc_wait: allocate zero-fill memory in the kernel's address space 609 * 610 * => memory is not allocated until fault time 611 * => if no room in map, wait for space to free, unless requested size 612 * is larger than map (in which case we return 0) 613 */ 614 615 vaddr_t 616 uvm_km_valloc_prefer_wait(struct vm_map *map, vsize_t size, voff_t prefer) 617 { 618 vaddr_t kva; 619 620 KASSERT(vm_map_pmap(map) == pmap_kernel()); 621 622 size = round_page(size); 623 if (size > vm_map_max(map) - vm_map_min(map)) 624 return(0); 625 626 while (1) { 627 kva = vm_map_min(map); /* hint */ 628 629 /* 630 * allocate some virtual space. will be demand filled 631 * by kernel_object. 632 */ 633 634 if (__predict_true(uvm_map(map, &kva, size, uvm.kernel_object, 635 prefer, 0, UVM_MAPFLAG(UVM_PROT_ALL, 636 UVM_PROT_ALL, UVM_INH_NONE, UVM_ADV_RANDOM, 0)) == 0)) { 637 return(kva); 638 } 639 640 /* 641 * failed. sleep for a while (on map) 642 */ 643 644 tsleep((caddr_t)map, PVM, "vallocwait", 0); 645 } 646 /*NOTREACHED*/ 647 } 648 649 vaddr_t 650 uvm_km_valloc_wait(struct vm_map *map, vsize_t size) 651 { 652 return uvm_km_valloc_prefer_wait(map, size, UVM_UNKNOWN_OFFSET); 653 } 654 655 #if defined(__HAVE_PMAP_DIRECT) 656 /* 657 * uvm_km_page allocator, __HAVE_PMAP_DIRECT arch 658 * On architectures with machine memory direct mapped into a portion 659 * of KVM, we have very little work to do. Just get a physical page, 660 * and find and return its VA. 661 */ 662 void 663 uvm_km_page_init(void) 664 { 665 /* nothing */ 666 } 667 668 #else 669 /* 670 * uvm_km_page allocator, non __HAVE_PMAP_DIRECT archs 671 * This is a special allocator that uses a reserve of free pages 672 * to fulfill requests. It is fast and interrupt safe, but can only 673 * return page sized regions. Its primary use is as a backend for pool. 674 * 675 * The memory returned is allocated from the larger kernel_map, sparing 676 * pressure on the small interrupt-safe kmem_map. It is wired, but 677 * not zero filled. 678 */ 679 680 struct uvm_km_pages uvm_km_pages; 681 682 void uvm_km_createthread(void *); 683 void uvm_km_thread(void *); 684 struct uvm_km_free_page *uvm_km_doputpage(struct uvm_km_free_page *); 685 686 /* 687 * Allocate the initial reserve, and create the thread which will 688 * keep the reserve full. For bootstrapping, we allocate more than 689 * the lowat amount, because it may be a while before the thread is 690 * running. 691 */ 692 void 693 uvm_km_page_init(void) 694 { 695 int lowat_min; 696 int i; 697 698 mtx_init(&uvm_km_pages.mtx, IPL_VM); 699 if (!uvm_km_pages.lowat) { 700 /* based on physmem, calculate a good value here */ 701 uvm_km_pages.lowat = physmem / 256; 702 lowat_min = physmem < atop(16 * 1024 * 1024) ? 32 : 128; 703 if (uvm_km_pages.lowat < lowat_min) 704 uvm_km_pages.lowat = lowat_min; 705 } 706 if (uvm_km_pages.lowat > UVM_KM_PAGES_LOWAT_MAX) 707 uvm_km_pages.lowat = UVM_KM_PAGES_LOWAT_MAX; 708 uvm_km_pages.hiwat = 4 * uvm_km_pages.lowat; 709 if (uvm_km_pages.hiwat > UVM_KM_PAGES_HIWAT_MAX) 710 uvm_km_pages.hiwat = UVM_KM_PAGES_HIWAT_MAX; 711 712 for (i = 0; i < uvm_km_pages.hiwat; i++) { 713 uvm_km_pages.page[i] = (vaddr_t)uvm_km_kmemalloc(kernel_map, 714 NULL, PAGE_SIZE, UVM_KMF_NOWAIT|UVM_KMF_VALLOC); 715 if (uvm_km_pages.page[i] == 0) 716 break; 717 } 718 uvm_km_pages.free = i; 719 for ( ; i < UVM_KM_PAGES_HIWAT_MAX; i++) 720 uvm_km_pages.page[i] = 0; 721 722 /* tone down if really high */ 723 if (uvm_km_pages.lowat > 512) 724 uvm_km_pages.lowat = 512; 725 726 kthread_create_deferred(uvm_km_createthread, NULL); 727 } 728 729 void 730 uvm_km_createthread(void *arg) 731 { 732 kthread_create(uvm_km_thread, NULL, &uvm_km_pages.km_proc, "kmthread"); 733 } 734 735 /* 736 * Endless loop. We grab pages in increments of 16 pages, then 737 * quickly swap them into the list. At some point we can consider 738 * returning memory to the system if we have too many free pages, 739 * but that's not implemented yet. 740 */ 741 void 742 uvm_km_thread(void *arg) 743 { 744 vaddr_t pg[16]; 745 int i; 746 int allocmore = 0; 747 struct uvm_km_free_page *fp = NULL; 748 749 for (;;) { 750 mtx_enter(&uvm_km_pages.mtx); 751 if (uvm_km_pages.free >= uvm_km_pages.lowat && 752 uvm_km_pages.freelist == NULL) { 753 msleep(&uvm_km_pages.km_proc, &uvm_km_pages.mtx, 754 PVM, "kmalloc", 0); 755 } 756 allocmore = uvm_km_pages.free < uvm_km_pages.lowat; 757 fp = uvm_km_pages.freelist; 758 uvm_km_pages.freelist = NULL; 759 uvm_km_pages.freelistlen = 0; 760 mtx_leave(&uvm_km_pages.mtx); 761 762 if (allocmore) { 763 for (i = 0; i < nitems(pg); i++) { 764 pg[i] = (vaddr_t)uvm_km_kmemalloc(kernel_map, 765 NULL, PAGE_SIZE, UVM_KMF_VALLOC); 766 } 767 768 mtx_enter(&uvm_km_pages.mtx); 769 for (i = 0; i < nitems(pg); i++) { 770 if (uvm_km_pages.free == 771 nitems(uvm_km_pages.page)) 772 break; 773 else 774 uvm_km_pages.page[uvm_km_pages.free++] 775 = pg[i]; 776 } 777 wakeup(&uvm_km_pages.free); 778 mtx_leave(&uvm_km_pages.mtx); 779 780 /* Cleanup left-over pages (if any). */ 781 for (; i < nitems(pg); i++) 782 uvm_km_free(kernel_map, pg[i], PAGE_SIZE); 783 } 784 while (fp) { 785 fp = uvm_km_doputpage(fp); 786 } 787 } 788 } 789 790 struct uvm_km_free_page * 791 uvm_km_doputpage(struct uvm_km_free_page *fp) 792 { 793 vaddr_t va = (vaddr_t)fp; 794 struct vm_page *pg; 795 int freeva = 1; 796 struct uvm_km_free_page *nextfp = fp->next; 797 798 pg = uvm_atopg(va); 799 800 pmap_kremove(va, PAGE_SIZE); 801 pmap_update(kernel_map->pmap); 802 803 mtx_enter(&uvm_km_pages.mtx); 804 if (uvm_km_pages.free < uvm_km_pages.hiwat) { 805 uvm_km_pages.page[uvm_km_pages.free++] = va; 806 freeva = 0; 807 } 808 mtx_leave(&uvm_km_pages.mtx); 809 810 if (freeva) 811 uvm_km_free(kernel_map, va, PAGE_SIZE); 812 813 uvm_pagefree(pg); 814 return (nextfp); 815 } 816 #endif /* !__HAVE_PMAP_DIRECT */ 817 818 void * 819 km_alloc(size_t sz, const struct kmem_va_mode *kv, 820 const struct kmem_pa_mode *kp, const struct kmem_dyn_mode *kd) 821 { 822 struct vm_map *map; 823 struct vm_page *pg; 824 struct pglist pgl; 825 int mapflags = 0; 826 vm_prot_t prot; 827 int pla_flags; 828 int pla_maxseg; 829 #ifdef __HAVE_PMAP_DIRECT 830 paddr_t pa; 831 #endif 832 vaddr_t va, sva; 833 834 KASSERT(sz == round_page(sz)); 835 836 TAILQ_INIT(&pgl); 837 838 if (kp->kp_nomem || kp->kp_pageable) 839 goto alloc_va; 840 841 pla_flags = kd->kd_waitok ? UVM_PLA_WAITOK : UVM_PLA_NOWAIT; 842 pla_flags |= UVM_PLA_TRYCONTIG; 843 if (kp->kp_zero) 844 pla_flags |= UVM_PLA_ZERO; 845 846 pla_maxseg = kp->kp_maxseg; 847 if (pla_maxseg == 0) 848 pla_maxseg = sz / PAGE_SIZE; 849 850 if (uvm_pglistalloc(sz, kp->kp_constraint->ucr_low, 851 kp->kp_constraint->ucr_high, kp->kp_align, kp->kp_boundary, 852 &pgl, pla_maxseg, pla_flags)) { 853 return (NULL); 854 } 855 856 #ifdef __HAVE_PMAP_DIRECT 857 if (kv->kv_align || kv->kv_executable) 858 goto alloc_va; 859 #if 1 860 /* 861 * For now, only do DIRECT mappings for single page 862 * allocations, until we figure out a good way to deal 863 * with contig allocations in km_free. 864 */ 865 if (!kv->kv_singlepage) 866 goto alloc_va; 867 #endif 868 /* 869 * Dubious optimization. If we got a contig segment, just map it 870 * through the direct map. 871 */ 872 TAILQ_FOREACH(pg, &pgl, pageq) { 873 if (pg != TAILQ_FIRST(&pgl) && 874 VM_PAGE_TO_PHYS(pg) != pa + PAGE_SIZE) 875 break; 876 pa = VM_PAGE_TO_PHYS(pg); 877 } 878 if (pg == NULL) { 879 TAILQ_FOREACH(pg, &pgl, pageq) { 880 vaddr_t v; 881 v = pmap_map_direct(pg); 882 if (pg == TAILQ_FIRST(&pgl)) 883 va = v; 884 } 885 return ((void *)va); 886 } 887 #endif 888 alloc_va: 889 if (kv->kv_executable) { 890 prot = VM_PROT_READ | VM_PROT_WRITE | VM_PROT_EXECUTE; 891 } else { 892 prot = VM_PROT_READ | VM_PROT_WRITE; 893 } 894 895 if (kp->kp_pageable) { 896 KASSERT(kp->kp_object); 897 KASSERT(!kv->kv_singlepage); 898 } else { 899 KASSERT(kp->kp_object == NULL); 900 } 901 902 if (kv->kv_singlepage) { 903 KASSERT(sz == PAGE_SIZE); 904 #ifdef __HAVE_PMAP_DIRECT 905 panic("km_alloc: DIRECT single page"); 906 #else 907 mtx_enter(&uvm_km_pages.mtx); 908 while (uvm_km_pages.free == 0) { 909 if (kd->kd_waitok == 0) { 910 mtx_leave(&uvm_km_pages.mtx); 911 uvm_pglistfree(&pgl); 912 return NULL; 913 } 914 msleep(&uvm_km_pages.free, &uvm_km_pages.mtx, PVM, 915 "getpage", 0); 916 } 917 va = uvm_km_pages.page[--uvm_km_pages.free]; 918 if (uvm_km_pages.free < uvm_km_pages.lowat && 919 curproc != uvm_km_pages.km_proc) { 920 if (kd->kd_slowdown) 921 *kd->kd_slowdown = 1; 922 wakeup(&uvm_km_pages.km_proc); 923 } 924 mtx_leave(&uvm_km_pages.mtx); 925 #endif 926 } else { 927 struct uvm_object *uobj = NULL; 928 929 if (kd->kd_trylock) 930 mapflags |= UVM_KMF_TRYLOCK; 931 932 if (kp->kp_object) 933 uobj = *kp->kp_object; 934 try_map: 935 map = *kv->kv_map; 936 va = vm_map_min(map); 937 if (uvm_map(map, &va, sz, uobj, kd->kd_prefer, 938 kv->kv_align, UVM_MAPFLAG(prot, prot, UVM_INH_NONE, 939 UVM_ADV_RANDOM, mapflags))) { 940 if (kv->kv_wait && kd->kd_waitok) { 941 tsleep(map, PVM, "km_allocva", 0); 942 goto try_map; 943 } 944 uvm_pglistfree(&pgl); 945 return (NULL); 946 } 947 } 948 sva = va; 949 TAILQ_FOREACH(pg, &pgl, pageq) { 950 if (kp->kp_pageable) 951 pmap_enter(pmap_kernel(), va, VM_PAGE_TO_PHYS(pg), 952 prot, prot | PMAP_WIRED); 953 else 954 pmap_kenter_pa(va, VM_PAGE_TO_PHYS(pg), prot); 955 va += PAGE_SIZE; 956 } 957 pmap_update(pmap_kernel()); 958 return ((void *)sva); 959 } 960 961 void 962 km_free(void *v, size_t sz, const struct kmem_va_mode *kv, 963 const struct kmem_pa_mode *kp) 964 { 965 vaddr_t sva, eva, va; 966 struct vm_page *pg; 967 struct pglist pgl; 968 969 sva = va = (vaddr_t)v; 970 eva = va + sz; 971 972 if (kp->kp_nomem) { 973 goto free_va; 974 } 975 976 if (kv->kv_singlepage) { 977 #ifdef __HAVE_PMAP_DIRECT 978 pg = pmap_unmap_direct(va); 979 uvm_pagefree(pg); 980 #else 981 struct uvm_km_free_page *fp = v; 982 mtx_enter(&uvm_km_pages.mtx); 983 fp->next = uvm_km_pages.freelist; 984 uvm_km_pages.freelist = fp; 985 if (uvm_km_pages.freelistlen++ > 16) 986 wakeup(&uvm_km_pages.km_proc); 987 mtx_leave(&uvm_km_pages.mtx); 988 #endif 989 return; 990 } 991 992 if (kp->kp_pageable) { 993 pmap_remove(pmap_kernel(), sva, eva); 994 pmap_update(pmap_kernel()); 995 } else { 996 TAILQ_INIT(&pgl); 997 for (va = sva; va < eva; va += PAGE_SIZE) { 998 paddr_t pa; 999 1000 if (!pmap_extract(pmap_kernel(), va, &pa)) 1001 continue; 1002 1003 pg = PHYS_TO_VM_PAGE(pa); 1004 if (pg == NULL) { 1005 panic("km_free: unmanaged page 0x%lx\n", pa); 1006 } 1007 TAILQ_INSERT_TAIL(&pgl, pg, pageq); 1008 } 1009 pmap_kremove(sva, sz); 1010 pmap_update(pmap_kernel()); 1011 uvm_pglistfree(&pgl); 1012 } 1013 free_va: 1014 uvm_unmap(*kv->kv_map, sva, eva); 1015 if (kv->kv_wait) 1016 wakeup(*kv->kv_map); 1017 } 1018 1019 const struct kmem_va_mode kv_any = { 1020 .kv_map = &kernel_map, 1021 }; 1022 1023 const struct kmem_va_mode kv_intrsafe = { 1024 .kv_map = &kmem_map, 1025 }; 1026 1027 const struct kmem_va_mode kv_page = { 1028 .kv_singlepage = 1 1029 }; 1030 1031 const struct kmem_pa_mode kp_dirty = { 1032 .kp_constraint = &no_constraint 1033 }; 1034 1035 const struct kmem_pa_mode kp_dma = { 1036 .kp_constraint = &dma_constraint 1037 }; 1038 1039 const struct kmem_pa_mode kp_dma_contig = { 1040 .kp_constraint = &dma_constraint, 1041 .kp_maxseg = 1 1042 }; 1043 1044 const struct kmem_pa_mode kp_dma_zero = { 1045 .kp_constraint = &dma_constraint, 1046 .kp_zero = 1 1047 }; 1048 1049 const struct kmem_pa_mode kp_zero = { 1050 .kp_constraint = &no_constraint, 1051 .kp_zero = 1 1052 }; 1053 1054 const struct kmem_pa_mode kp_pageable = { 1055 .kp_object = &uvm.kernel_object, 1056 .kp_pageable = 1 1057 /* XXX - kp_nomem, maybe, but we'll need to fix km_free. */ 1058 }; 1059 1060 const struct kmem_pa_mode kp_none = { 1061 .kp_nomem = 1 1062 }; 1063 1064 const struct kmem_dyn_mode kd_waitok = { 1065 .kd_waitok = 1, 1066 .kd_prefer = UVM_UNKNOWN_OFFSET 1067 }; 1068 1069 const struct kmem_dyn_mode kd_nowait = { 1070 .kd_prefer = UVM_UNKNOWN_OFFSET 1071 }; 1072 1073 const struct kmem_dyn_mode kd_trylock = { 1074 .kd_trylock = 1, 1075 .kd_prefer = UVM_UNKNOWN_OFFSET 1076 }; 1077