1 /* $OpenBSD: uvm_km.c,v 1.107 2012/03/09 13:01:29 ariane Exp $ */ 2 /* $NetBSD: uvm_km.c,v 1.42 2001/01/14 02:10:01 thorpej Exp $ */ 3 4 /* 5 * Copyright (c) 1997 Charles D. Cranor and Washington University. 6 * Copyright (c) 1991, 1993, The Regents of the University of California. 7 * 8 * All rights reserved. 9 * 10 * This code is derived from software contributed to Berkeley by 11 * The Mach Operating System project at Carnegie-Mellon University. 12 * 13 * Redistribution and use in source and binary forms, with or without 14 * modification, are permitted provided that the following conditions 15 * are met: 16 * 1. Redistributions of source code must retain the above copyright 17 * notice, this list of conditions and the following disclaimer. 18 * 2. Redistributions in binary form must reproduce the above copyright 19 * notice, this list of conditions and the following disclaimer in the 20 * documentation and/or other materials provided with the distribution. 21 * 3. All advertising materials mentioning features or use of this software 22 * must display the following acknowledgement: 23 * This product includes software developed by Charles D. Cranor, 24 * Washington University, the University of California, Berkeley and 25 * its contributors. 26 * 4. Neither the name of the University nor the names of its contributors 27 * may be used to endorse or promote products derived from this software 28 * without specific prior written permission. 29 * 30 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 31 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 32 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 33 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 34 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 35 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 36 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 37 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 38 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 39 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 40 * SUCH DAMAGE. 41 * 42 * @(#)vm_kern.c 8.3 (Berkeley) 1/12/94 43 * from: Id: uvm_km.c,v 1.1.2.14 1998/02/06 05:19:27 chs Exp 44 * 45 * 46 * Copyright (c) 1987, 1990 Carnegie-Mellon University. 47 * All rights reserved. 48 * 49 * Permission to use, copy, modify and distribute this software and 50 * its documentation is hereby granted, provided that both the copyright 51 * notice and this permission notice appear in all copies of the 52 * software, derivative works or modified versions, and any portions 53 * thereof, and that both notices appear in supporting documentation. 54 * 55 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" 56 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND 57 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. 58 * 59 * Carnegie Mellon requests users of this software to return to 60 * 61 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU 62 * School of Computer Science 63 * Carnegie Mellon University 64 * Pittsburgh PA 15213-3890 65 * 66 * any improvements or extensions that they make and grant Carnegie the 67 * rights to redistribute these changes. 68 */ 69 70 /* 71 * uvm_km.c: handle kernel memory allocation and management 72 */ 73 74 /* 75 * overview of kernel memory management: 76 * 77 * the kernel virtual address space is mapped by "kernel_map." kernel_map 78 * starts at VM_MIN_KERNEL_ADDRESS and goes to VM_MAX_KERNEL_ADDRESS. 79 * note that VM_MIN_KERNEL_ADDRESS is equal to vm_map_min(kernel_map). 80 * 81 * the kernel_map has several "submaps." submaps can only appear in 82 * the kernel_map (user processes can't use them). submaps "take over" 83 * the management of a sub-range of the kernel's address space. submaps 84 * are typically allocated at boot time and are never released. kernel 85 * virtual address space that is mapped by a submap is locked by the 86 * submap's lock -- not the kernel_map's lock. 87 * 88 * thus, the useful feature of submaps is that they allow us to break 89 * up the locking and protection of the kernel address space into smaller 90 * chunks. 91 * 92 * The VM system has several standard kernel submaps: 93 * kmem_map: Contains only wired kernel memory for malloc(9). 94 * Note: All access to this map must be protected by splvm as 95 * calls to malloc(9) are allowed in interrupt handlers. 96 * exec_map: Memory to hold arguments to system calls are allocated from 97 * this map. 98 * XXX: This is primeraly used to artificially limit the number 99 * of concurrent processes doing an exec. 100 * phys_map: Buffers for vmapbuf (physio) are allocated from this map. 101 * 102 * the kernel allocates its private memory out of special uvm_objects whose 103 * reference count is set to UVM_OBJ_KERN (thus indicating that the objects 104 * are "special" and never die). all kernel objects should be thought of 105 * as large, fixed-sized, sparsely populated uvm_objects. each kernel 106 * object is equal to the size of kernel virtual address space (i.e. the 107 * value "VM_MAX_KERNEL_ADDRESS - VM_MIN_KERNEL_ADDRESS"). 108 * 109 * most kernel private memory lives in kernel_object. the only exception 110 * to this is for memory that belongs to submaps that must be protected 111 * by splvm(). each of these submaps manages their own pages. 112 * 113 * note that just because a kernel object spans the entire kernel virtual 114 * address space doesn't mean that it has to be mapped into the entire space. 115 * large chunks of a kernel object's space go unused either because 116 * that area of kernel VM is unmapped, or there is some other type of 117 * object mapped into that range (e.g. a vnode). for submap's kernel 118 * objects, the only part of the object that can ever be populated is the 119 * offsets that are managed by the submap. 120 * 121 * note that the "offset" in a kernel object is always the kernel virtual 122 * address minus the VM_MIN_KERNEL_ADDRESS (aka vm_map_min(kernel_map)). 123 * example: 124 * suppose VM_MIN_KERNEL_ADDRESS is 0xf8000000 and the kernel does a 125 * uvm_km_alloc(kernel_map, PAGE_SIZE) [allocate 1 wired down page in the 126 * kernel map]. if uvm_km_alloc returns virtual address 0xf8235000, 127 * then that means that the page at offset 0x235000 in kernel_object is 128 * mapped at 0xf8235000. 129 * 130 * kernel objects have one other special property: when the kernel virtual 131 * memory mapping them is unmapped, the backing memory in the object is 132 * freed right away. this is done with the uvm_km_pgremove() function. 133 * this has to be done because there is no backing store for kernel pages 134 * and no need to save them after they are no longer referenced. 135 */ 136 137 #include <sys/param.h> 138 #include <sys/systm.h> 139 #include <sys/proc.h> 140 #include <sys/kthread.h> 141 #include <uvm/uvm.h> 142 143 /* 144 * global data structures 145 */ 146 147 struct vm_map *kernel_map = NULL; 148 149 /* Unconstraint range. */ 150 struct uvm_constraint_range no_constraint = { 0x0, (paddr_t)-1 }; 151 152 /* 153 * local data structues 154 */ 155 156 static struct vm_map kernel_map_store; 157 158 /* 159 * uvm_km_init: init kernel maps and objects to reflect reality (i.e. 160 * KVM already allocated for text, data, bss, and static data structures). 161 * 162 * => KVM is defined by VM_MIN_KERNEL_ADDRESS/VM_MAX_KERNEL_ADDRESS. 163 * we assume that [min -> start] has already been allocated and that 164 * "end" is the end. 165 */ 166 167 void 168 uvm_km_init(vaddr_t start, vaddr_t end) 169 { 170 vaddr_t base = VM_MIN_KERNEL_ADDRESS; 171 172 /* 173 * next, init kernel memory objects. 174 */ 175 176 /* kernel_object: for pageable anonymous kernel memory */ 177 uao_init(); 178 uvm.kernel_object = uao_create(VM_MAX_KERNEL_ADDRESS - 179 VM_MIN_KERNEL_ADDRESS, UAO_FLAG_KERNOBJ); 180 181 /* 182 * init the map and reserve already allocated kernel space 183 * before installing. 184 */ 185 186 uvm_map_setup(&kernel_map_store, base, end, 187 #ifdef KVA_GUARDPAGES 188 VM_MAP_PAGEABLE | VM_MAP_GUARDPAGES 189 #else 190 VM_MAP_PAGEABLE 191 #endif 192 ); 193 kernel_map_store.pmap = pmap_kernel(); 194 if (base != start && uvm_map(&kernel_map_store, &base, start - base, 195 NULL, UVM_UNKNOWN_OFFSET, 0, UVM_MAPFLAG(UVM_PROT_ALL, UVM_PROT_ALL, 196 UVM_INH_NONE, UVM_ADV_RANDOM,UVM_FLAG_FIXED)) != 0) 197 panic("uvm_km_init: could not reserve space for kernel"); 198 199 /* 200 * install! 201 */ 202 203 kernel_map = &kernel_map_store; 204 } 205 206 /* 207 * uvm_km_suballoc: allocate a submap in the kernel map. once a submap 208 * is allocated all references to that area of VM must go through it. this 209 * allows the locking of VAs in kernel_map to be broken up into regions. 210 * 211 * => if `fixed' is true, *min specifies where the region described 212 * by the submap must start 213 * => if submap is non NULL we use that as the submap, otherwise we 214 * alloc a new map 215 */ 216 struct vm_map * 217 uvm_km_suballoc(struct vm_map *map, vaddr_t *min, vaddr_t *max, vsize_t size, 218 int flags, boolean_t fixed, struct vm_map *submap) 219 { 220 int mapflags = UVM_FLAG_NOMERGE | (fixed ? UVM_FLAG_FIXED : 0); 221 222 size = round_page(size); /* round up to pagesize */ 223 224 /* 225 * first allocate a blank spot in the parent map 226 */ 227 228 if (uvm_map(map, min, size, NULL, UVM_UNKNOWN_OFFSET, 0, 229 UVM_MAPFLAG(UVM_PROT_ALL, UVM_PROT_ALL, UVM_INH_NONE, 230 UVM_ADV_RANDOM, mapflags)) != 0) { 231 panic("uvm_km_suballoc: unable to allocate space in parent map"); 232 } 233 234 /* 235 * set VM bounds (min is filled in by uvm_map) 236 */ 237 238 *max = *min + size; 239 240 /* 241 * add references to pmap and create or init the submap 242 */ 243 244 pmap_reference(vm_map_pmap(map)); 245 if (submap == NULL) { 246 submap = uvm_map_create(vm_map_pmap(map), *min, *max, flags); 247 if (submap == NULL) 248 panic("uvm_km_suballoc: unable to create submap"); 249 } else { 250 uvm_map_setup(submap, *min, *max, flags); 251 submap->pmap = vm_map_pmap(map); 252 } 253 254 /* 255 * now let uvm_map_submap plug in it... 256 */ 257 258 if (uvm_map_submap(map, *min, *max, submap) != 0) 259 panic("uvm_km_suballoc: submap allocation failed"); 260 261 return(submap); 262 } 263 264 /* 265 * uvm_km_pgremove: remove pages from a kernel uvm_object. 266 * 267 * => when you unmap a part of anonymous kernel memory you want to toss 268 * the pages right away. (this gets called from uvm_unmap_...). 269 */ 270 void 271 uvm_km_pgremove(struct uvm_object *uobj, vaddr_t start, vaddr_t end) 272 { 273 struct vm_page *pp; 274 voff_t curoff; 275 int slot; 276 277 KASSERT(uobj->pgops == &aobj_pager); 278 279 for (curoff = start ; curoff < end ; curoff += PAGE_SIZE) { 280 pp = uvm_pagelookup(uobj, curoff); 281 if (pp && pp->pg_flags & PG_BUSY) { 282 atomic_setbits_int(&pp->pg_flags, PG_WANTED); 283 UVM_UNLOCK_AND_WAIT(pp, &uobj->vmobjlock, 0, 284 "km_pgrm", 0); 285 simple_lock(&uobj->vmobjlock); 286 curoff -= PAGE_SIZE; /* loop back to us */ 287 continue; 288 } 289 290 /* free the swap slot, then the page */ 291 slot = uao_dropswap(uobj, curoff >> PAGE_SHIFT); 292 293 if (pp != NULL) { 294 uvm_lock_pageq(); 295 uvm_pagefree(pp); 296 uvm_unlock_pageq(); 297 } else if (slot != 0) { 298 simple_lock(&uvm.swap_data_lock); 299 uvmexp.swpgonly--; 300 simple_unlock(&uvm.swap_data_lock); 301 } 302 } 303 } 304 305 306 /* 307 * uvm_km_pgremove_intrsafe: like uvm_km_pgremove(), but for "intrsafe" 308 * objects 309 * 310 * => when you unmap a part of anonymous kernel memory you want to toss 311 * the pages right away. (this gets called from uvm_unmap_...). 312 * => none of the pages will ever be busy, and none of them will ever 313 * be on the active or inactive queues (because these objects are 314 * never allowed to "page"). 315 */ 316 317 void 318 uvm_km_pgremove_intrsafe(vaddr_t start, vaddr_t end) 319 { 320 struct vm_page *pg; 321 vaddr_t va; 322 paddr_t pa; 323 324 for (va = start; va < end; va += PAGE_SIZE) { 325 if (!pmap_extract(pmap_kernel(), va, &pa)) 326 continue; 327 pg = PHYS_TO_VM_PAGE(pa); 328 if (pg == NULL) 329 panic("uvm_km_pgremove_intrsafe: no page"); 330 uvm_pagefree(pg); 331 } 332 } 333 334 /* 335 * uvm_km_kmemalloc: lower level kernel memory allocator for malloc() 336 * 337 * => we map wired memory into the specified map using the obj passed in 338 * => NOTE: we can return NULL even if we can wait if there is not enough 339 * free VM space in the map... caller should be prepared to handle 340 * this case. 341 * => we return KVA of memory allocated 342 * => flags: NOWAIT, VALLOC - just allocate VA, TRYLOCK - fail if we can't 343 * lock the map 344 * => low, high, alignment, boundary, nsegs are the corresponding parameters 345 * to uvm_pglistalloc 346 * => flags: ZERO - correspond to uvm_pglistalloc flags 347 */ 348 349 vaddr_t 350 uvm_km_kmemalloc_pla(struct vm_map *map, struct uvm_object *obj, vsize_t size, 351 vsize_t valign, int flags, paddr_t low, paddr_t high, paddr_t alignment, 352 paddr_t boundary, int nsegs) 353 { 354 vaddr_t kva, loopva; 355 voff_t offset; 356 struct vm_page *pg; 357 struct pglist pgl; 358 int pla_flags; 359 360 KASSERT(vm_map_pmap(map) == pmap_kernel()); 361 /* UVM_KMF_VALLOC => !UVM_KMF_ZERO */ 362 KASSERT(!(flags & UVM_KMF_VALLOC) || 363 !(flags & UVM_KMF_ZERO)); 364 365 /* 366 * setup for call 367 */ 368 369 size = round_page(size); 370 kva = vm_map_min(map); /* hint */ 371 if (nsegs == 0) 372 nsegs = atop(size); 373 374 /* 375 * allocate some virtual space 376 */ 377 378 if (__predict_false(uvm_map(map, &kva, size, obj, UVM_UNKNOWN_OFFSET, 379 valign, UVM_MAPFLAG(UVM_PROT_RW, UVM_PROT_RW, UVM_INH_NONE, 380 UVM_ADV_RANDOM, (flags & UVM_KMF_TRYLOCK))) != 0)) { 381 return(0); 382 } 383 384 /* 385 * if all we wanted was VA, return now 386 */ 387 388 if (flags & UVM_KMF_VALLOC) { 389 return(kva); 390 } 391 392 /* 393 * recover object offset from virtual address 394 */ 395 396 if (obj != NULL) 397 offset = kva - vm_map_min(kernel_map); 398 else 399 offset = 0; 400 401 /* 402 * now allocate and map in the memory... note that we are the only ones 403 * whom should ever get a handle on this area of VM. 404 */ 405 TAILQ_INIT(&pgl); 406 pla_flags = 0; 407 if ((flags & UVM_KMF_NOWAIT) || 408 ((flags & UVM_KMF_CANFAIL) && 409 uvmexp.swpgonly - uvmexp.swpages <= atop(size))) 410 pla_flags |= UVM_PLA_NOWAIT; 411 else 412 pla_flags |= UVM_PLA_WAITOK; 413 if (flags & UVM_KMF_ZERO) 414 pla_flags |= UVM_PLA_ZERO; 415 if (uvm_pglistalloc(size, low, high, alignment, boundary, &pgl, nsegs, 416 pla_flags) != 0) { 417 /* Failed. */ 418 uvm_unmap(map, kva, kva + size); 419 return (0); 420 } 421 422 loopva = kva; 423 while (loopva != kva + size) { 424 pg = TAILQ_FIRST(&pgl); 425 TAILQ_REMOVE(&pgl, pg, pageq); 426 uvm_pagealloc_pg(pg, obj, offset, NULL); 427 atomic_clearbits_int(&pg->pg_flags, PG_BUSY); 428 UVM_PAGE_OWN(pg, NULL); 429 430 /* 431 * map it in: note that we call pmap_enter with the map and 432 * object unlocked in case we are kmem_map. 433 */ 434 435 if (obj == NULL) { 436 pmap_kenter_pa(loopva, VM_PAGE_TO_PHYS(pg), 437 UVM_PROT_RW); 438 } else { 439 pmap_enter(map->pmap, loopva, VM_PAGE_TO_PHYS(pg), 440 UVM_PROT_RW, 441 PMAP_WIRED | VM_PROT_READ | VM_PROT_WRITE); 442 } 443 loopva += PAGE_SIZE; 444 offset += PAGE_SIZE; 445 } 446 KASSERT(TAILQ_EMPTY(&pgl)); 447 pmap_update(pmap_kernel()); 448 449 return(kva); 450 } 451 452 /* 453 * uvm_km_free: free an area of kernel memory 454 */ 455 456 void 457 uvm_km_free(struct vm_map *map, vaddr_t addr, vsize_t size) 458 { 459 uvm_unmap(map, trunc_page(addr), round_page(addr+size)); 460 } 461 462 /* 463 * uvm_km_free_wakeup: free an area of kernel memory and wake up 464 * anyone waiting for vm space. 465 * 466 * => XXX: "wanted" bit + unlock&wait on other end? 467 */ 468 469 void 470 uvm_km_free_wakeup(struct vm_map *map, vaddr_t addr, vsize_t size) 471 { 472 struct uvm_map_deadq dead_entries; 473 474 vm_map_lock(map); 475 TAILQ_INIT(&dead_entries); 476 uvm_unmap_remove(map, trunc_page(addr), round_page(addr+size), 477 &dead_entries, FALSE, TRUE); 478 wakeup(map); 479 vm_map_unlock(map); 480 481 uvm_unmap_detach(&dead_entries, 0); 482 } 483 484 /* 485 * uvm_km_alloc1: allocate wired down memory in the kernel map. 486 * 487 * => we can sleep if needed 488 */ 489 490 vaddr_t 491 uvm_km_alloc1(struct vm_map *map, vsize_t size, vsize_t align, boolean_t zeroit) 492 { 493 vaddr_t kva, loopva; 494 voff_t offset; 495 struct vm_page *pg; 496 497 KASSERT(vm_map_pmap(map) == pmap_kernel()); 498 499 size = round_page(size); 500 kva = vm_map_min(map); /* hint */ 501 502 /* 503 * allocate some virtual space 504 */ 505 506 if (__predict_false(uvm_map(map, &kva, size, uvm.kernel_object, 507 UVM_UNKNOWN_OFFSET, align, UVM_MAPFLAG(UVM_PROT_ALL, UVM_PROT_ALL, 508 UVM_INH_NONE, UVM_ADV_RANDOM, 0)) != 0)) { 509 return(0); 510 } 511 512 /* 513 * recover object offset from virtual address 514 */ 515 516 offset = kva - vm_map_min(kernel_map); 517 518 /* 519 * now allocate the memory. we must be careful about released pages. 520 */ 521 522 loopva = kva; 523 while (size) { 524 simple_lock(&uvm.kernel_object->vmobjlock); 525 /* allocate ram */ 526 pg = uvm_pagealloc(uvm.kernel_object, offset, NULL, 0); 527 if (pg) { 528 atomic_clearbits_int(&pg->pg_flags, PG_BUSY); 529 UVM_PAGE_OWN(pg, NULL); 530 } 531 simple_unlock(&uvm.kernel_object->vmobjlock); 532 if (__predict_false(pg == NULL)) { 533 if (curproc == uvm.pagedaemon_proc) { 534 /* 535 * It is unfeasible for the page daemon to 536 * sleep for memory, so free what we have 537 * allocated and fail. 538 */ 539 uvm_unmap(map, kva, loopva - kva); 540 return (0); 541 } else { 542 uvm_wait("km_alloc1w"); /* wait for memory */ 543 continue; 544 } 545 } 546 547 /* 548 * map it in; note we're never called with an intrsafe 549 * object, so we always use regular old pmap_enter(). 550 */ 551 pmap_enter(map->pmap, loopva, VM_PAGE_TO_PHYS(pg), 552 UVM_PROT_ALL, PMAP_WIRED | VM_PROT_READ | VM_PROT_WRITE); 553 554 loopva += PAGE_SIZE; 555 offset += PAGE_SIZE; 556 size -= PAGE_SIZE; 557 } 558 pmap_update(map->pmap); 559 560 /* 561 * zero on request (note that "size" is now zero due to the above loop 562 * so we need to subtract kva from loopva to reconstruct the size). 563 */ 564 565 if (zeroit) 566 memset((caddr_t)kva, 0, loopva - kva); 567 568 return(kva); 569 } 570 571 /* 572 * uvm_km_valloc: allocate zero-fill memory in the kernel's address space 573 * 574 * => memory is not allocated until fault time 575 */ 576 577 vaddr_t 578 uvm_km_valloc(struct vm_map *map, vsize_t size) 579 { 580 return(uvm_km_valloc_align(map, size, 0, 0)); 581 } 582 583 vaddr_t 584 uvm_km_valloc_try(struct vm_map *map, vsize_t size) 585 { 586 return(uvm_km_valloc_align(map, size, 0, UVM_FLAG_TRYLOCK)); 587 } 588 589 vaddr_t 590 uvm_km_valloc_align(struct vm_map *map, vsize_t size, vsize_t align, int flags) 591 { 592 vaddr_t kva; 593 594 KASSERT(vm_map_pmap(map) == pmap_kernel()); 595 596 size = round_page(size); 597 kva = vm_map_min(map); /* hint */ 598 599 /* 600 * allocate some virtual space. will be demand filled by kernel_object. 601 */ 602 603 if (__predict_false(uvm_map(map, &kva, size, uvm.kernel_object, 604 UVM_UNKNOWN_OFFSET, align, UVM_MAPFLAG(UVM_PROT_ALL, UVM_PROT_ALL, 605 UVM_INH_NONE, UVM_ADV_RANDOM, flags)) != 0)) { 606 return(0); 607 } 608 609 return(kva); 610 } 611 612 /* 613 * uvm_km_valloc_wait: allocate zero-fill memory in the kernel's address space 614 * 615 * => memory is not allocated until fault time 616 * => if no room in map, wait for space to free, unless requested size 617 * is larger than map (in which case we return 0) 618 */ 619 620 vaddr_t 621 uvm_km_valloc_prefer_wait(struct vm_map *map, vsize_t size, voff_t prefer) 622 { 623 vaddr_t kva; 624 625 KASSERT(vm_map_pmap(map) == pmap_kernel()); 626 627 size = round_page(size); 628 if (size > vm_map_max(map) - vm_map_min(map)) 629 return(0); 630 631 while (1) { 632 kva = vm_map_min(map); /* hint */ 633 634 /* 635 * allocate some virtual space. will be demand filled 636 * by kernel_object. 637 */ 638 639 if (__predict_true(uvm_map(map, &kva, size, uvm.kernel_object, 640 prefer, 0, UVM_MAPFLAG(UVM_PROT_ALL, 641 UVM_PROT_ALL, UVM_INH_NONE, UVM_ADV_RANDOM, 0)) == 0)) { 642 return(kva); 643 } 644 645 /* 646 * failed. sleep for a while (on map) 647 */ 648 649 tsleep((caddr_t)map, PVM, "vallocwait", 0); 650 } 651 /*NOTREACHED*/ 652 } 653 654 vaddr_t 655 uvm_km_valloc_wait(struct vm_map *map, vsize_t size) 656 { 657 return uvm_km_valloc_prefer_wait(map, size, UVM_UNKNOWN_OFFSET); 658 } 659 660 #if defined(__HAVE_PMAP_DIRECT) 661 /* 662 * uvm_km_page allocator, __HAVE_PMAP_DIRECT arch 663 * On architectures with machine memory direct mapped into a portion 664 * of KVM, we have very little work to do. Just get a physical page, 665 * and find and return its VA. 666 */ 667 void 668 uvm_km_page_init(void) 669 { 670 /* nothing */ 671 } 672 673 #else 674 /* 675 * uvm_km_page allocator, non __HAVE_PMAP_DIRECT archs 676 * This is a special allocator that uses a reserve of free pages 677 * to fulfill requests. It is fast and interrupt safe, but can only 678 * return page sized regions. Its primary use is as a backend for pool. 679 * 680 * The memory returned is allocated from the larger kernel_map, sparing 681 * pressure on the small interrupt-safe kmem_map. It is wired, but 682 * not zero filled. 683 */ 684 685 struct uvm_km_pages uvm_km_pages; 686 687 void uvm_km_createthread(void *); 688 void uvm_km_thread(void *); 689 struct uvm_km_free_page *uvm_km_doputpage(struct uvm_km_free_page *); 690 691 /* 692 * Allocate the initial reserve, and create the thread which will 693 * keep the reserve full. For bootstrapping, we allocate more than 694 * the lowat amount, because it may be a while before the thread is 695 * running. 696 */ 697 void 698 uvm_km_page_init(void) 699 { 700 int lowat_min; 701 int i; 702 int len, bulk; 703 vaddr_t addr; 704 705 mtx_init(&uvm_km_pages.mtx, IPL_VM); 706 if (!uvm_km_pages.lowat) { 707 /* based on physmem, calculate a good value here */ 708 uvm_km_pages.lowat = physmem / 256; 709 lowat_min = physmem < atop(16 * 1024 * 1024) ? 32 : 128; 710 if (uvm_km_pages.lowat < lowat_min) 711 uvm_km_pages.lowat = lowat_min; 712 } 713 if (uvm_km_pages.lowat > UVM_KM_PAGES_LOWAT_MAX) 714 uvm_km_pages.lowat = UVM_KM_PAGES_LOWAT_MAX; 715 uvm_km_pages.hiwat = 4 * uvm_km_pages.lowat; 716 if (uvm_km_pages.hiwat > UVM_KM_PAGES_HIWAT_MAX) 717 uvm_km_pages.hiwat = UVM_KM_PAGES_HIWAT_MAX; 718 719 /* Allocate all pages in as few allocations as possible. */ 720 len = 0; 721 bulk = uvm_km_pages.hiwat; 722 while (len < uvm_km_pages.hiwat && bulk > 0) { 723 bulk = MIN(bulk, uvm_km_pages.hiwat - len); 724 addr = vm_map_min(kernel_map); 725 if (uvm_map(kernel_map, &addr, (vsize_t)bulk << PAGE_SHIFT, 726 NULL, UVM_UNKNOWN_OFFSET, 0, 727 UVM_MAPFLAG(UVM_PROT_RW, UVM_PROT_RW, UVM_INH_NONE, 728 UVM_ADV_RANDOM, UVM_KMF_TRYLOCK)) != 0) { 729 bulk /= 2; 730 continue; 731 } 732 733 for (i = len; i < len + bulk; i++, addr += PAGE_SIZE) 734 uvm_km_pages.page[i] = addr; 735 len += bulk; 736 } 737 738 uvm_km_pages.free = len; 739 for (i = len; i < UVM_KM_PAGES_HIWAT_MAX; i++) 740 uvm_km_pages.page[i] = 0; 741 742 /* tone down if really high */ 743 if (uvm_km_pages.lowat > 512) 744 uvm_km_pages.lowat = 512; 745 746 kthread_create_deferred(uvm_km_createthread, NULL); 747 } 748 749 void 750 uvm_km_createthread(void *arg) 751 { 752 kthread_create(uvm_km_thread, NULL, &uvm_km_pages.km_proc, "kmthread"); 753 } 754 755 /* 756 * Endless loop. We grab pages in increments of 16 pages, then 757 * quickly swap them into the list. At some point we can consider 758 * returning memory to the system if we have too many free pages, 759 * but that's not implemented yet. 760 */ 761 void 762 uvm_km_thread(void *arg) 763 { 764 vaddr_t pg[16]; 765 int i; 766 int allocmore = 0; 767 struct uvm_km_free_page *fp = NULL; 768 769 for (;;) { 770 mtx_enter(&uvm_km_pages.mtx); 771 if (uvm_km_pages.free >= uvm_km_pages.lowat && 772 uvm_km_pages.freelist == NULL) { 773 msleep(&uvm_km_pages.km_proc, &uvm_km_pages.mtx, 774 PVM, "kmalloc", 0); 775 } 776 allocmore = uvm_km_pages.free < uvm_km_pages.lowat; 777 fp = uvm_km_pages.freelist; 778 uvm_km_pages.freelist = NULL; 779 uvm_km_pages.freelistlen = 0; 780 mtx_leave(&uvm_km_pages.mtx); 781 782 if (allocmore) { 783 bzero(pg, sizeof(pg)); 784 for (i = 0; i < nitems(pg); i++) { 785 pg[i] = vm_map_min(kernel_map); 786 if (uvm_map(kernel_map, &pg[i], PAGE_SIZE, 787 NULL, UVM_UNKNOWN_OFFSET, 0, 788 UVM_MAPFLAG(UVM_PROT_RW, UVM_PROT_RW, 789 UVM_INH_NONE, UVM_ADV_RANDOM, 790 UVM_KMF_TRYLOCK)) != 0) { 791 pg[i] = 0; 792 break; 793 } 794 } 795 796 mtx_enter(&uvm_km_pages.mtx); 797 for (i = 0; i < nitems(pg); i++) { 798 if (uvm_km_pages.free == 799 nitems(uvm_km_pages.page)) 800 break; 801 else if (pg[i] != 0) 802 uvm_km_pages.page[uvm_km_pages.free++] 803 = pg[i]; 804 } 805 wakeup(&uvm_km_pages.free); 806 mtx_leave(&uvm_km_pages.mtx); 807 808 /* Cleanup left-over pages (if any). */ 809 for (; i < nitems(pg); i++) { 810 if (pg[i] != 0) { 811 uvm_unmap(kernel_map, 812 pg[i], pg[i] + PAGE_SIZE); 813 } 814 } 815 } 816 while (fp) { 817 fp = uvm_km_doputpage(fp); 818 } 819 } 820 } 821 822 struct uvm_km_free_page * 823 uvm_km_doputpage(struct uvm_km_free_page *fp) 824 { 825 vaddr_t va = (vaddr_t)fp; 826 struct vm_page *pg; 827 int freeva = 1; 828 struct uvm_km_free_page *nextfp = fp->next; 829 830 pg = uvm_atopg(va); 831 832 pmap_kremove(va, PAGE_SIZE); 833 pmap_update(kernel_map->pmap); 834 835 mtx_enter(&uvm_km_pages.mtx); 836 if (uvm_km_pages.free < uvm_km_pages.hiwat) { 837 uvm_km_pages.page[uvm_km_pages.free++] = va; 838 freeva = 0; 839 } 840 mtx_leave(&uvm_km_pages.mtx); 841 842 if (freeva) 843 uvm_unmap(kernel_map, va, va + PAGE_SIZE); 844 845 uvm_pagefree(pg); 846 return (nextfp); 847 } 848 #endif /* !__HAVE_PMAP_DIRECT */ 849 850 void * 851 km_alloc(size_t sz, const struct kmem_va_mode *kv, 852 const struct kmem_pa_mode *kp, const struct kmem_dyn_mode *kd) 853 { 854 struct vm_map *map; 855 struct vm_page *pg; 856 struct pglist pgl; 857 int mapflags = 0; 858 vm_prot_t prot; 859 int pla_flags; 860 int pla_maxseg; 861 #ifdef __HAVE_PMAP_DIRECT 862 paddr_t pa; 863 #endif 864 vaddr_t va, sva; 865 866 KASSERT(sz == round_page(sz)); 867 868 TAILQ_INIT(&pgl); 869 870 if (kp->kp_nomem || kp->kp_pageable) 871 goto alloc_va; 872 873 pla_flags = kd->kd_waitok ? UVM_PLA_WAITOK : UVM_PLA_NOWAIT; 874 pla_flags |= UVM_PLA_TRYCONTIG; 875 if (kp->kp_zero) 876 pla_flags |= UVM_PLA_ZERO; 877 878 pla_maxseg = kp->kp_maxseg; 879 if (pla_maxseg == 0) 880 pla_maxseg = sz / PAGE_SIZE; 881 882 if (uvm_pglistalloc(sz, kp->kp_constraint->ucr_low, 883 kp->kp_constraint->ucr_high, kp->kp_align, kp->kp_boundary, 884 &pgl, pla_maxseg, pla_flags)) { 885 return (NULL); 886 } 887 888 #ifdef __HAVE_PMAP_DIRECT 889 if (kv->kv_align || kv->kv_executable) 890 goto alloc_va; 891 #if 1 892 /* 893 * For now, only do DIRECT mappings for single page 894 * allocations, until we figure out a good way to deal 895 * with contig allocations in km_free. 896 */ 897 if (!kv->kv_singlepage) 898 goto alloc_va; 899 #endif 900 /* 901 * Dubious optimization. If we got a contig segment, just map it 902 * through the direct map. 903 */ 904 TAILQ_FOREACH(pg, &pgl, pageq) { 905 if (pg != TAILQ_FIRST(&pgl) && 906 VM_PAGE_TO_PHYS(pg) != pa + PAGE_SIZE) 907 break; 908 pa = VM_PAGE_TO_PHYS(pg); 909 } 910 if (pg == NULL) { 911 TAILQ_FOREACH(pg, &pgl, pageq) { 912 vaddr_t v; 913 v = pmap_map_direct(pg); 914 if (pg == TAILQ_FIRST(&pgl)) 915 va = v; 916 } 917 return ((void *)va); 918 } 919 #endif 920 alloc_va: 921 if (kv->kv_executable) { 922 prot = VM_PROT_READ | VM_PROT_WRITE | VM_PROT_EXECUTE; 923 } else { 924 prot = VM_PROT_READ | VM_PROT_WRITE; 925 } 926 927 if (kp->kp_pageable) { 928 KASSERT(kp->kp_object); 929 KASSERT(!kv->kv_singlepage); 930 } else { 931 KASSERT(kp->kp_object == NULL); 932 } 933 934 if (kv->kv_singlepage) { 935 KASSERT(sz == PAGE_SIZE); 936 #ifdef __HAVE_PMAP_DIRECT 937 panic("km_alloc: DIRECT single page"); 938 #else 939 mtx_enter(&uvm_km_pages.mtx); 940 while (uvm_km_pages.free == 0) { 941 if (kd->kd_waitok == 0) { 942 mtx_leave(&uvm_km_pages.mtx); 943 uvm_pglistfree(&pgl); 944 return NULL; 945 } 946 msleep(&uvm_km_pages.free, &uvm_km_pages.mtx, PVM, 947 "getpage", 0); 948 } 949 va = uvm_km_pages.page[--uvm_km_pages.free]; 950 if (uvm_km_pages.free < uvm_km_pages.lowat && 951 curproc != uvm_km_pages.km_proc) { 952 if (kd->kd_slowdown) 953 *kd->kd_slowdown = 1; 954 wakeup(&uvm_km_pages.km_proc); 955 } 956 mtx_leave(&uvm_km_pages.mtx); 957 #endif 958 } else { 959 struct uvm_object *uobj = NULL; 960 961 if (kd->kd_trylock) 962 mapflags |= UVM_KMF_TRYLOCK; 963 964 if (kp->kp_object) 965 uobj = *kp->kp_object; 966 try_map: 967 map = *kv->kv_map; 968 va = vm_map_min(map); 969 if (uvm_map(map, &va, sz, uobj, kd->kd_prefer, 970 kv->kv_align, UVM_MAPFLAG(prot, prot, UVM_INH_NONE, 971 UVM_ADV_RANDOM, mapflags))) { 972 if (kv->kv_wait && kd->kd_waitok) { 973 tsleep(map, PVM, "km_allocva", 0); 974 goto try_map; 975 } 976 uvm_pglistfree(&pgl); 977 return (NULL); 978 } 979 } 980 sva = va; 981 TAILQ_FOREACH(pg, &pgl, pageq) { 982 if (kp->kp_pageable) 983 pmap_enter(pmap_kernel(), va, VM_PAGE_TO_PHYS(pg), 984 prot, prot | PMAP_WIRED); 985 else 986 pmap_kenter_pa(va, VM_PAGE_TO_PHYS(pg), prot); 987 va += PAGE_SIZE; 988 } 989 pmap_update(pmap_kernel()); 990 return ((void *)sva); 991 } 992 993 void 994 km_free(void *v, size_t sz, const struct kmem_va_mode *kv, 995 const struct kmem_pa_mode *kp) 996 { 997 vaddr_t sva, eva, va; 998 struct vm_page *pg; 999 struct pglist pgl; 1000 1001 sva = va = (vaddr_t)v; 1002 eva = va + sz; 1003 1004 if (kp->kp_nomem) { 1005 goto free_va; 1006 } 1007 1008 if (kv->kv_singlepage) { 1009 #ifdef __HAVE_PMAP_DIRECT 1010 pg = pmap_unmap_direct(va); 1011 uvm_pagefree(pg); 1012 #else 1013 struct uvm_km_free_page *fp = v; 1014 mtx_enter(&uvm_km_pages.mtx); 1015 fp->next = uvm_km_pages.freelist; 1016 uvm_km_pages.freelist = fp; 1017 if (uvm_km_pages.freelistlen++ > 16) 1018 wakeup(&uvm_km_pages.km_proc); 1019 mtx_leave(&uvm_km_pages.mtx); 1020 #endif 1021 return; 1022 } 1023 1024 if (kp->kp_pageable) { 1025 pmap_remove(pmap_kernel(), sva, eva); 1026 pmap_update(pmap_kernel()); 1027 } else { 1028 TAILQ_INIT(&pgl); 1029 for (va = sva; va < eva; va += PAGE_SIZE) { 1030 paddr_t pa; 1031 1032 if (!pmap_extract(pmap_kernel(), va, &pa)) 1033 continue; 1034 1035 pg = PHYS_TO_VM_PAGE(pa); 1036 if (pg == NULL) { 1037 panic("km_free: unmanaged page 0x%lx\n", pa); 1038 } 1039 TAILQ_INSERT_TAIL(&pgl, pg, pageq); 1040 } 1041 pmap_kremove(sva, sz); 1042 pmap_update(pmap_kernel()); 1043 uvm_pglistfree(&pgl); 1044 } 1045 free_va: 1046 uvm_unmap(*kv->kv_map, sva, eva); 1047 if (kv->kv_wait) 1048 wakeup(*kv->kv_map); 1049 } 1050 1051 const struct kmem_va_mode kv_any = { 1052 .kv_map = &kernel_map, 1053 }; 1054 1055 const struct kmem_va_mode kv_intrsafe = { 1056 .kv_map = &kmem_map, 1057 }; 1058 1059 const struct kmem_va_mode kv_page = { 1060 .kv_singlepage = 1 1061 }; 1062 1063 const struct kmem_pa_mode kp_dirty = { 1064 .kp_constraint = &no_constraint 1065 }; 1066 1067 const struct kmem_pa_mode kp_dma = { 1068 .kp_constraint = &dma_constraint 1069 }; 1070 1071 const struct kmem_pa_mode kp_dma_contig = { 1072 .kp_constraint = &dma_constraint, 1073 .kp_maxseg = 1 1074 }; 1075 1076 const struct kmem_pa_mode kp_dma_zero = { 1077 .kp_constraint = &dma_constraint, 1078 .kp_zero = 1 1079 }; 1080 1081 const struct kmem_pa_mode kp_zero = { 1082 .kp_constraint = &no_constraint, 1083 .kp_zero = 1 1084 }; 1085 1086 const struct kmem_pa_mode kp_pageable = { 1087 .kp_object = &uvm.kernel_object, 1088 .kp_pageable = 1 1089 /* XXX - kp_nomem, maybe, but we'll need to fix km_free. */ 1090 }; 1091 1092 const struct kmem_pa_mode kp_none = { 1093 .kp_nomem = 1 1094 }; 1095 1096 const struct kmem_dyn_mode kd_waitok = { 1097 .kd_waitok = 1, 1098 .kd_prefer = UVM_UNKNOWN_OFFSET 1099 }; 1100 1101 const struct kmem_dyn_mode kd_nowait = { 1102 .kd_prefer = UVM_UNKNOWN_OFFSET 1103 }; 1104 1105 const struct kmem_dyn_mode kd_trylock = { 1106 .kd_trylock = 1, 1107 .kd_prefer = UVM_UNKNOWN_OFFSET 1108 }; 1109