1 /* $OpenBSD: uvm_km.c,v 1.114 2014/07/11 16:35:40 jsg Exp $ */ 2 /* $NetBSD: uvm_km.c,v 1.42 2001/01/14 02:10:01 thorpej Exp $ */ 3 4 /* 5 * Copyright (c) 1997 Charles D. Cranor and Washington University. 6 * Copyright (c) 1991, 1993, The Regents of the University of California. 7 * 8 * All rights reserved. 9 * 10 * This code is derived from software contributed to Berkeley by 11 * The Mach Operating System project at Carnegie-Mellon University. 12 * 13 * Redistribution and use in source and binary forms, with or without 14 * modification, are permitted provided that the following conditions 15 * are met: 16 * 1. Redistributions of source code must retain the above copyright 17 * notice, this list of conditions and the following disclaimer. 18 * 2. Redistributions in binary form must reproduce the above copyright 19 * notice, this list of conditions and the following disclaimer in the 20 * documentation and/or other materials provided with the distribution. 21 * 3. Neither the name of the University nor the names of its contributors 22 * may be used to endorse or promote products derived from this software 23 * without specific prior written permission. 24 * 25 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 26 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 27 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 28 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 29 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 30 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 31 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 32 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 33 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 34 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 35 * SUCH DAMAGE. 36 * 37 * @(#)vm_kern.c 8.3 (Berkeley) 1/12/94 38 * from: Id: uvm_km.c,v 1.1.2.14 1998/02/06 05:19:27 chs Exp 39 * 40 * 41 * Copyright (c) 1987, 1990 Carnegie-Mellon University. 42 * All rights reserved. 43 * 44 * Permission to use, copy, modify and distribute this software and 45 * its documentation is hereby granted, provided that both the copyright 46 * notice and this permission notice appear in all copies of the 47 * software, derivative works or modified versions, and any portions 48 * thereof, and that both notices appear in supporting documentation. 49 * 50 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" 51 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND 52 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. 53 * 54 * Carnegie Mellon requests users of this software to return to 55 * 56 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU 57 * School of Computer Science 58 * Carnegie Mellon University 59 * Pittsburgh PA 15213-3890 60 * 61 * any improvements or extensions that they make and grant Carnegie the 62 * rights to redistribute these changes. 63 */ 64 65 /* 66 * uvm_km.c: handle kernel memory allocation and management 67 */ 68 69 /* 70 * overview of kernel memory management: 71 * 72 * the kernel virtual address space is mapped by "kernel_map." kernel_map 73 * starts at VM_MIN_KERNEL_ADDRESS and goes to VM_MAX_KERNEL_ADDRESS. 74 * note that VM_MIN_KERNEL_ADDRESS is equal to vm_map_min(kernel_map). 75 * 76 * the kernel_map has several "submaps." submaps can only appear in 77 * the kernel_map (user processes can't use them). submaps "take over" 78 * the management of a sub-range of the kernel's address space. submaps 79 * are typically allocated at boot time and are never released. kernel 80 * virtual address space that is mapped by a submap is locked by the 81 * submap's lock -- not the kernel_map's lock. 82 * 83 * thus, the useful feature of submaps is that they allow us to break 84 * up the locking and protection of the kernel address space into smaller 85 * chunks. 86 * 87 * The VM system has several standard kernel submaps: 88 * kmem_map: Contains only wired kernel memory for malloc(9). 89 * Note: All access to this map must be protected by splvm as 90 * calls to malloc(9) are allowed in interrupt handlers. 91 * exec_map: Memory to hold arguments to system calls are allocated from 92 * this map. 93 * XXX: This is primeraly used to artificially limit the number 94 * of concurrent processes doing an exec. 95 * phys_map: Buffers for vmapbuf (physio) are allocated from this map. 96 * 97 * the kernel allocates its private memory out of special uvm_objects whose 98 * reference count is set to UVM_OBJ_KERN (thus indicating that the objects 99 * are "special" and never die). all kernel objects should be thought of 100 * as large, fixed-sized, sparsely populated uvm_objects. each kernel 101 * object is equal to the size of kernel virtual address space (i.e. the 102 * value "VM_MAX_KERNEL_ADDRESS - VM_MIN_KERNEL_ADDRESS"). 103 * 104 * most kernel private memory lives in kernel_object. the only exception 105 * to this is for memory that belongs to submaps that must be protected 106 * by splvm(). each of these submaps manages their own pages. 107 * 108 * note that just because a kernel object spans the entire kernel virtual 109 * address space doesn't mean that it has to be mapped into the entire space. 110 * large chunks of a kernel object's space go unused either because 111 * that area of kernel VM is unmapped, or there is some other type of 112 * object mapped into that range (e.g. a vnode). for submap's kernel 113 * objects, the only part of the object that can ever be populated is the 114 * offsets that are managed by the submap. 115 * 116 * note that the "offset" in a kernel object is always the kernel virtual 117 * address minus the VM_MIN_KERNEL_ADDRESS (aka vm_map_min(kernel_map)). 118 * example: 119 * suppose VM_MIN_KERNEL_ADDRESS is 0xf8000000 and the kernel does a 120 * uvm_km_alloc(kernel_map, PAGE_SIZE) [allocate 1 wired down page in the 121 * kernel map]. if uvm_km_alloc returns virtual address 0xf8235000, 122 * then that means that the page at offset 0x235000 in kernel_object is 123 * mapped at 0xf8235000. 124 * 125 * kernel objects have one other special property: when the kernel virtual 126 * memory mapping them is unmapped, the backing memory in the object is 127 * freed right away. this is done with the uvm_km_pgremove() function. 128 * this has to be done because there is no backing store for kernel pages 129 * and no need to save them after they are no longer referenced. 130 */ 131 132 #include <sys/param.h> 133 #include <sys/systm.h> 134 #include <sys/proc.h> 135 #include <sys/kthread.h> 136 #include <uvm/uvm.h> 137 138 /* 139 * global data structures 140 */ 141 142 struct vm_map *kernel_map = NULL; 143 144 /* Unconstraint range. */ 145 struct uvm_constraint_range no_constraint = { 0x0, (paddr_t)-1 }; 146 147 /* 148 * local data structues 149 */ 150 static struct vm_map kernel_map_store; 151 152 /* 153 * uvm_km_init: init kernel maps and objects to reflect reality (i.e. 154 * KVM already allocated for text, data, bss, and static data structures). 155 * 156 * => KVM is defined by VM_MIN_KERNEL_ADDRESS/VM_MAX_KERNEL_ADDRESS. 157 * we assume that [min -> start] has already been allocated and that 158 * "end" is the end. 159 */ 160 void 161 uvm_km_init(vaddr_t start, vaddr_t end) 162 { 163 vaddr_t base = VM_MIN_KERNEL_ADDRESS; 164 165 /* next, init kernel memory objects. */ 166 167 /* kernel_object: for pageable anonymous kernel memory */ 168 uao_init(); 169 uvm.kernel_object = uao_create(VM_MAX_KERNEL_ADDRESS - 170 VM_MIN_KERNEL_ADDRESS, UAO_FLAG_KERNOBJ); 171 172 /* 173 * init the map and reserve already allocated kernel space 174 * before installing. 175 */ 176 177 uvm_map_setup(&kernel_map_store, base, end, 178 #ifdef KVA_GUARDPAGES 179 VM_MAP_PAGEABLE | VM_MAP_GUARDPAGES 180 #else 181 VM_MAP_PAGEABLE 182 #endif 183 ); 184 kernel_map_store.pmap = pmap_kernel(); 185 if (base != start && uvm_map(&kernel_map_store, &base, start - base, 186 NULL, UVM_UNKNOWN_OFFSET, 0, UVM_MAPFLAG(UVM_PROT_ALL, UVM_PROT_ALL, 187 UVM_INH_NONE, UVM_ADV_RANDOM,UVM_FLAG_FIXED)) != 0) 188 panic("uvm_km_init: could not reserve space for kernel"); 189 190 kernel_map = &kernel_map_store; 191 } 192 193 /* 194 * uvm_km_suballoc: allocate a submap in the kernel map. once a submap 195 * is allocated all references to that area of VM must go through it. this 196 * allows the locking of VAs in kernel_map to be broken up into regions. 197 * 198 * => if `fixed' is true, *min specifies where the region described 199 * by the submap must start 200 * => if submap is non NULL we use that as the submap, otherwise we 201 * alloc a new map 202 */ 203 struct vm_map * 204 uvm_km_suballoc(struct vm_map *map, vaddr_t *min, vaddr_t *max, vsize_t size, 205 int flags, boolean_t fixed, struct vm_map *submap) 206 { 207 int mapflags = UVM_FLAG_NOMERGE | (fixed ? UVM_FLAG_FIXED : 0); 208 209 size = round_page(size); /* round up to pagesize */ 210 211 /* first allocate a blank spot in the parent map */ 212 if (uvm_map(map, min, size, NULL, UVM_UNKNOWN_OFFSET, 0, 213 UVM_MAPFLAG(UVM_PROT_ALL, UVM_PROT_ALL, UVM_INH_NONE, 214 UVM_ADV_RANDOM, mapflags)) != 0) { 215 panic("uvm_km_suballoc: unable to allocate space in parent map"); 216 } 217 218 /* set VM bounds (min is filled in by uvm_map) */ 219 *max = *min + size; 220 221 /* add references to pmap and create or init the submap */ 222 pmap_reference(vm_map_pmap(map)); 223 if (submap == NULL) { 224 submap = uvm_map_create(vm_map_pmap(map), *min, *max, flags); 225 if (submap == NULL) 226 panic("uvm_km_suballoc: unable to create submap"); 227 } else { 228 uvm_map_setup(submap, *min, *max, flags); 229 submap->pmap = vm_map_pmap(map); 230 } 231 232 /* now let uvm_map_submap plug in it... */ 233 if (uvm_map_submap(map, *min, *max, submap) != 0) 234 panic("uvm_km_suballoc: submap allocation failed"); 235 236 return(submap); 237 } 238 239 /* 240 * uvm_km_pgremove: remove pages from a kernel uvm_object. 241 * 242 * => when you unmap a part of anonymous kernel memory you want to toss 243 * the pages right away. (this gets called from uvm_unmap_...). 244 */ 245 void 246 uvm_km_pgremove(struct uvm_object *uobj, vaddr_t start, vaddr_t end) 247 { 248 struct vm_page *pp; 249 voff_t curoff; 250 int slot; 251 252 KASSERT(uobj->pgops == &aobj_pager); 253 254 for (curoff = start ; curoff < end ; curoff += PAGE_SIZE) { 255 pp = uvm_pagelookup(uobj, curoff); 256 if (pp && pp->pg_flags & PG_BUSY) { 257 atomic_setbits_int(&pp->pg_flags, PG_WANTED); 258 UVM_WAIT(pp, 0, "km_pgrm", 0); 259 curoff -= PAGE_SIZE; /* loop back to us */ 260 continue; 261 } 262 263 /* free the swap slot, then the page */ 264 slot = uao_dropswap(uobj, curoff >> PAGE_SHIFT); 265 266 if (pp != NULL) { 267 uvm_lock_pageq(); 268 uvm_pagefree(pp); 269 uvm_unlock_pageq(); 270 } else if (slot != 0) { 271 uvmexp.swpgonly--; 272 } 273 } 274 } 275 276 277 /* 278 * uvm_km_pgremove_intrsafe: like uvm_km_pgremove(), but for "intrsafe" 279 * objects 280 * 281 * => when you unmap a part of anonymous kernel memory you want to toss 282 * the pages right away. (this gets called from uvm_unmap_...). 283 * => none of the pages will ever be busy, and none of them will ever 284 * be on the active or inactive queues (because these objects are 285 * never allowed to "page"). 286 */ 287 void 288 uvm_km_pgremove_intrsafe(vaddr_t start, vaddr_t end) 289 { 290 struct vm_page *pg; 291 vaddr_t va; 292 paddr_t pa; 293 294 for (va = start; va < end; va += PAGE_SIZE) { 295 if (!pmap_extract(pmap_kernel(), va, &pa)) 296 continue; 297 pg = PHYS_TO_VM_PAGE(pa); 298 if (pg == NULL) 299 panic("uvm_km_pgremove_intrsafe: no page"); 300 uvm_pagefree(pg); 301 } 302 } 303 304 /* 305 * uvm_km_kmemalloc: lower level kernel memory allocator for malloc() 306 * 307 * => we map wired memory into the specified map using the obj passed in 308 * => NOTE: we can return NULL even if we can wait if there is not enough 309 * free VM space in the map... caller should be prepared to handle 310 * this case. 311 * => we return KVA of memory allocated 312 * => flags: NOWAIT, VALLOC - just allocate VA, TRYLOCK - fail if we can't 313 * lock the map 314 * => low, high, alignment, boundary, nsegs are the corresponding parameters 315 * to uvm_pglistalloc 316 * => flags: ZERO - correspond to uvm_pglistalloc flags 317 */ 318 vaddr_t 319 uvm_km_kmemalloc_pla(struct vm_map *map, struct uvm_object *obj, vsize_t size, 320 vsize_t valign, int flags, paddr_t low, paddr_t high, paddr_t alignment, 321 paddr_t boundary, int nsegs) 322 { 323 vaddr_t kva, loopva; 324 voff_t offset; 325 struct vm_page *pg; 326 struct pglist pgl; 327 int pla_flags; 328 329 KASSERT(vm_map_pmap(map) == pmap_kernel()); 330 /* UVM_KMF_VALLOC => !UVM_KMF_ZERO */ 331 KASSERT(!(flags & UVM_KMF_VALLOC) || 332 !(flags & UVM_KMF_ZERO)); 333 334 /* setup for call */ 335 size = round_page(size); 336 kva = vm_map_min(map); /* hint */ 337 if (nsegs == 0) 338 nsegs = atop(size); 339 340 /* allocate some virtual space */ 341 if (__predict_false(uvm_map(map, &kva, size, obj, UVM_UNKNOWN_OFFSET, 342 valign, UVM_MAPFLAG(UVM_PROT_RW, UVM_PROT_RW, UVM_INH_NONE, 343 UVM_ADV_RANDOM, (flags & UVM_KMF_TRYLOCK))) != 0)) { 344 return(0); 345 } 346 347 /* if all we wanted was VA, return now */ 348 if (flags & UVM_KMF_VALLOC) { 349 return(kva); 350 } 351 352 /* recover object offset from virtual address */ 353 if (obj != NULL) 354 offset = kva - vm_map_min(kernel_map); 355 else 356 offset = 0; 357 358 /* 359 * now allocate and map in the memory... note that we are the only ones 360 * whom should ever get a handle on this area of VM. 361 */ 362 TAILQ_INIT(&pgl); 363 pla_flags = 0; 364 KASSERT(uvmexp.swpgonly <= uvmexp.swpages); 365 if ((flags & UVM_KMF_NOWAIT) || 366 ((flags & UVM_KMF_CANFAIL) && 367 uvmexp.swpages - uvmexp.swpgonly <= atop(size))) 368 pla_flags |= UVM_PLA_NOWAIT; 369 else 370 pla_flags |= UVM_PLA_WAITOK; 371 if (flags & UVM_KMF_ZERO) 372 pla_flags |= UVM_PLA_ZERO; 373 if (uvm_pglistalloc(size, low, high, alignment, boundary, &pgl, nsegs, 374 pla_flags) != 0) { 375 /* Failed. */ 376 uvm_unmap(map, kva, kva + size); 377 return (0); 378 } 379 380 loopva = kva; 381 while (loopva != kva + size) { 382 pg = TAILQ_FIRST(&pgl); 383 TAILQ_REMOVE(&pgl, pg, pageq); 384 uvm_pagealloc_pg(pg, obj, offset, NULL); 385 atomic_clearbits_int(&pg->pg_flags, PG_BUSY); 386 UVM_PAGE_OWN(pg, NULL); 387 388 /* 389 * map it in: note that we call pmap_enter with the map and 390 * object unlocked in case we are kmem_map. 391 */ 392 if (obj == NULL) { 393 pmap_kenter_pa(loopva, VM_PAGE_TO_PHYS(pg), 394 UVM_PROT_RW); 395 } else { 396 pmap_enter(map->pmap, loopva, VM_PAGE_TO_PHYS(pg), 397 UVM_PROT_RW, 398 PMAP_WIRED | VM_PROT_READ | VM_PROT_WRITE); 399 } 400 loopva += PAGE_SIZE; 401 offset += PAGE_SIZE; 402 } 403 KASSERT(TAILQ_EMPTY(&pgl)); 404 pmap_update(pmap_kernel()); 405 406 return(kva); 407 } 408 409 /* 410 * uvm_km_free: free an area of kernel memory 411 */ 412 void 413 uvm_km_free(struct vm_map *map, vaddr_t addr, vsize_t size) 414 { 415 uvm_unmap(map, trunc_page(addr), round_page(addr+size)); 416 } 417 418 /* 419 * uvm_km_free_wakeup: free an area of kernel memory and wake up 420 * anyone waiting for vm space. 421 * 422 * => XXX: "wanted" bit + unlock&wait on other end? 423 */ 424 void 425 uvm_km_free_wakeup(struct vm_map *map, vaddr_t addr, vsize_t size) 426 { 427 struct uvm_map_deadq dead_entries; 428 429 vm_map_lock(map); 430 TAILQ_INIT(&dead_entries); 431 uvm_unmap_remove(map, trunc_page(addr), round_page(addr+size), 432 &dead_entries, FALSE, TRUE); 433 wakeup(map); 434 vm_map_unlock(map); 435 436 uvm_unmap_detach(&dead_entries, 0); 437 } 438 439 /* 440 * uvm_km_alloc1: allocate wired down memory in the kernel map. 441 * 442 * => we can sleep if needed 443 */ 444 vaddr_t 445 uvm_km_alloc1(struct vm_map *map, vsize_t size, vsize_t align, boolean_t zeroit) 446 { 447 vaddr_t kva, loopva; 448 voff_t offset; 449 struct vm_page *pg; 450 451 KASSERT(vm_map_pmap(map) == pmap_kernel()); 452 453 size = round_page(size); 454 kva = vm_map_min(map); /* hint */ 455 456 /* allocate some virtual space */ 457 if (__predict_false(uvm_map(map, &kva, size, uvm.kernel_object, 458 UVM_UNKNOWN_OFFSET, align, UVM_MAPFLAG(UVM_PROT_ALL, UVM_PROT_ALL, 459 UVM_INH_NONE, UVM_ADV_RANDOM, 0)) != 0)) { 460 return(0); 461 } 462 463 /* recover object offset from virtual address */ 464 offset = kva - vm_map_min(kernel_map); 465 466 /* now allocate the memory. we must be careful about released pages. */ 467 loopva = kva; 468 while (size) { 469 /* allocate ram */ 470 pg = uvm_pagealloc(uvm.kernel_object, offset, NULL, 0); 471 if (pg) { 472 atomic_clearbits_int(&pg->pg_flags, PG_BUSY); 473 UVM_PAGE_OWN(pg, NULL); 474 } 475 if (__predict_false(pg == NULL)) { 476 if (curproc == uvm.pagedaemon_proc) { 477 /* 478 * It is unfeasible for the page daemon to 479 * sleep for memory, so free what we have 480 * allocated and fail. 481 */ 482 uvm_unmap(map, kva, loopva - kva); 483 return (0); 484 } else { 485 uvm_wait("km_alloc1w"); /* wait for memory */ 486 continue; 487 } 488 } 489 490 /* 491 * map it in; note we're never called with an intrsafe 492 * object, so we always use regular old pmap_enter(). 493 */ 494 pmap_enter(map->pmap, loopva, VM_PAGE_TO_PHYS(pg), 495 UVM_PROT_ALL, PMAP_WIRED | VM_PROT_READ | VM_PROT_WRITE); 496 497 loopva += PAGE_SIZE; 498 offset += PAGE_SIZE; 499 size -= PAGE_SIZE; 500 } 501 pmap_update(map->pmap); 502 503 /* 504 * zero on request (note that "size" is now zero due to the above loop 505 * so we need to subtract kva from loopva to reconstruct the size). 506 */ 507 if (zeroit) 508 memset((caddr_t)kva, 0, loopva - kva); 509 510 return(kva); 511 } 512 513 /* 514 * uvm_km_valloc: allocate zero-fill memory in the kernel's address space 515 * 516 * => memory is not allocated until fault time 517 */ 518 519 vaddr_t 520 uvm_km_valloc(struct vm_map *map, vsize_t size) 521 { 522 return(uvm_km_valloc_align(map, size, 0, 0)); 523 } 524 525 vaddr_t 526 uvm_km_valloc_try(struct vm_map *map, vsize_t size) 527 { 528 return(uvm_km_valloc_align(map, size, 0, UVM_FLAG_TRYLOCK)); 529 } 530 531 vaddr_t 532 uvm_km_valloc_align(struct vm_map *map, vsize_t size, vsize_t align, int flags) 533 { 534 vaddr_t kva; 535 536 KASSERT(vm_map_pmap(map) == pmap_kernel()); 537 538 size = round_page(size); 539 kva = vm_map_min(map); /* hint */ 540 541 /* allocate some virtual space, demand filled by kernel_object. */ 542 543 if (__predict_false(uvm_map(map, &kva, size, uvm.kernel_object, 544 UVM_UNKNOWN_OFFSET, align, UVM_MAPFLAG(UVM_PROT_ALL, UVM_PROT_ALL, 545 UVM_INH_NONE, UVM_ADV_RANDOM, flags)) != 0)) { 546 return(0); 547 } 548 549 return(kva); 550 } 551 552 /* 553 * uvm_km_valloc_wait: allocate zero-fill memory in the kernel's address space 554 * 555 * => memory is not allocated until fault time 556 * => if no room in map, wait for space to free, unless requested size 557 * is larger than map (in which case we return 0) 558 */ 559 vaddr_t 560 uvm_km_valloc_prefer_wait(struct vm_map *map, vsize_t size, voff_t prefer) 561 { 562 vaddr_t kva; 563 564 KASSERT(vm_map_pmap(map) == pmap_kernel()); 565 566 size = round_page(size); 567 if (size > vm_map_max(map) - vm_map_min(map)) 568 return(0); 569 570 while (1) { 571 kva = vm_map_min(map); /* hint */ 572 573 /* 574 * allocate some virtual space. will be demand filled 575 * by kernel_object. 576 */ 577 if (__predict_true(uvm_map(map, &kva, size, uvm.kernel_object, 578 prefer, 0, UVM_MAPFLAG(UVM_PROT_ALL, 579 UVM_PROT_ALL, UVM_INH_NONE, UVM_ADV_RANDOM, 0)) == 0)) { 580 return(kva); 581 } 582 583 /* failed. sleep for a while (on map) */ 584 tsleep(map, PVM, "vallocwait", 0); 585 } 586 /*NOTREACHED*/ 587 } 588 589 vaddr_t 590 uvm_km_valloc_wait(struct vm_map *map, vsize_t size) 591 { 592 return uvm_km_valloc_prefer_wait(map, size, UVM_UNKNOWN_OFFSET); 593 } 594 595 #if defined(__HAVE_PMAP_DIRECT) 596 /* 597 * uvm_km_page allocator, __HAVE_PMAP_DIRECT arch 598 * On architectures with machine memory direct mapped into a portion 599 * of KVM, we have very little work to do. Just get a physical page, 600 * and find and return its VA. 601 */ 602 void 603 uvm_km_page_init(void) 604 { 605 /* nothing */ 606 } 607 608 #else 609 /* 610 * uvm_km_page allocator, non __HAVE_PMAP_DIRECT archs 611 * This is a special allocator that uses a reserve of free pages 612 * to fulfill requests. It is fast and interrupt safe, but can only 613 * return page sized regions. Its primary use is as a backend for pool. 614 * 615 * The memory returned is allocated from the larger kernel_map, sparing 616 * pressure on the small interrupt-safe kmem_map. It is wired, but 617 * not zero filled. 618 */ 619 620 struct uvm_km_pages uvm_km_pages; 621 622 void uvm_km_createthread(void *); 623 void uvm_km_thread(void *); 624 struct uvm_km_free_page *uvm_km_doputpage(struct uvm_km_free_page *); 625 626 /* 627 * Allocate the initial reserve, and create the thread which will 628 * keep the reserve full. For bootstrapping, we allocate more than 629 * the lowat amount, because it may be a while before the thread is 630 * running. 631 */ 632 void 633 uvm_km_page_init(void) 634 { 635 int lowat_min; 636 int i; 637 int len, bulk; 638 vaddr_t addr; 639 640 mtx_init(&uvm_km_pages.mtx, IPL_VM); 641 if (!uvm_km_pages.lowat) { 642 /* based on physmem, calculate a good value here */ 643 uvm_km_pages.lowat = physmem / 256; 644 lowat_min = physmem < atop(16 * 1024 * 1024) ? 32 : 128; 645 if (uvm_km_pages.lowat < lowat_min) 646 uvm_km_pages.lowat = lowat_min; 647 } 648 if (uvm_km_pages.lowat > UVM_KM_PAGES_LOWAT_MAX) 649 uvm_km_pages.lowat = UVM_KM_PAGES_LOWAT_MAX; 650 uvm_km_pages.hiwat = 4 * uvm_km_pages.lowat; 651 if (uvm_km_pages.hiwat > UVM_KM_PAGES_HIWAT_MAX) 652 uvm_km_pages.hiwat = UVM_KM_PAGES_HIWAT_MAX; 653 654 /* Allocate all pages in as few allocations as possible. */ 655 len = 0; 656 bulk = uvm_km_pages.hiwat; 657 while (len < uvm_km_pages.hiwat && bulk > 0) { 658 bulk = MIN(bulk, uvm_km_pages.hiwat - len); 659 addr = vm_map_min(kernel_map); 660 if (uvm_map(kernel_map, &addr, (vsize_t)bulk << PAGE_SHIFT, 661 NULL, UVM_UNKNOWN_OFFSET, 0, 662 UVM_MAPFLAG(UVM_PROT_RW, UVM_PROT_RW, UVM_INH_NONE, 663 UVM_ADV_RANDOM, UVM_KMF_TRYLOCK)) != 0) { 664 bulk /= 2; 665 continue; 666 } 667 668 for (i = len; i < len + bulk; i++, addr += PAGE_SIZE) 669 uvm_km_pages.page[i] = addr; 670 len += bulk; 671 } 672 673 uvm_km_pages.free = len; 674 for (i = len; i < UVM_KM_PAGES_HIWAT_MAX; i++) 675 uvm_km_pages.page[i] = 0; 676 677 /* tone down if really high */ 678 if (uvm_km_pages.lowat > 512) 679 uvm_km_pages.lowat = 512; 680 681 kthread_create_deferred(uvm_km_createthread, NULL); 682 } 683 684 void 685 uvm_km_createthread(void *arg) 686 { 687 kthread_create(uvm_km_thread, NULL, &uvm_km_pages.km_proc, "kmthread"); 688 } 689 690 /* 691 * Endless loop. We grab pages in increments of 16 pages, then 692 * quickly swap them into the list. At some point we can consider 693 * returning memory to the system if we have too many free pages, 694 * but that's not implemented yet. 695 */ 696 void 697 uvm_km_thread(void *arg) 698 { 699 vaddr_t pg[16]; 700 int i; 701 int allocmore = 0; 702 int flags; 703 struct uvm_km_free_page *fp = NULL; 704 705 for (;;) { 706 mtx_enter(&uvm_km_pages.mtx); 707 if (uvm_km_pages.free >= uvm_km_pages.lowat && 708 uvm_km_pages.freelist == NULL) { 709 msleep(&uvm_km_pages.km_proc, &uvm_km_pages.mtx, 710 PVM, "kmalloc", 0); 711 } 712 allocmore = uvm_km_pages.free < uvm_km_pages.lowat; 713 fp = uvm_km_pages.freelist; 714 uvm_km_pages.freelist = NULL; 715 uvm_km_pages.freelistlen = 0; 716 mtx_leave(&uvm_km_pages.mtx); 717 718 if (allocmore) { 719 /* 720 * If there was nothing on the freelist, then we 721 * must obtain at least one page to make progress. 722 * So, only use UVM_KMF_TRYLOCK for the first page 723 * if fp != NULL 724 */ 725 flags = UVM_MAPFLAG(UVM_PROT_RW, UVM_PROT_RW, 726 UVM_INH_NONE, UVM_ADV_RANDOM, 727 fp != NULL ? UVM_KMF_TRYLOCK : 0); 728 bzero(pg, sizeof(pg)); 729 for (i = 0; i < nitems(pg); i++) { 730 pg[i] = vm_map_min(kernel_map); 731 if (uvm_map(kernel_map, &pg[i], PAGE_SIZE, 732 NULL, UVM_UNKNOWN_OFFSET, 0, flags) != 0) { 733 pg[i] = 0; 734 break; 735 } 736 737 /* made progress, so don't sleep for more */ 738 flags = UVM_MAPFLAG(UVM_PROT_RW, UVM_PROT_RW, 739 UVM_INH_NONE, UVM_ADV_RANDOM, 740 UVM_KMF_TRYLOCK); 741 } 742 743 mtx_enter(&uvm_km_pages.mtx); 744 for (i = 0; i < nitems(pg); i++) { 745 if (uvm_km_pages.free == 746 nitems(uvm_km_pages.page)) 747 break; 748 else if (pg[i] != 0) 749 uvm_km_pages.page[uvm_km_pages.free++] 750 = pg[i]; 751 } 752 wakeup(&uvm_km_pages.free); 753 mtx_leave(&uvm_km_pages.mtx); 754 755 /* Cleanup left-over pages (if any). */ 756 for (; i < nitems(pg); i++) { 757 if (pg[i] != 0) { 758 uvm_unmap(kernel_map, 759 pg[i], pg[i] + PAGE_SIZE); 760 } 761 } 762 } 763 while (fp) { 764 fp = uvm_km_doputpage(fp); 765 } 766 } 767 } 768 769 struct uvm_km_free_page * 770 uvm_km_doputpage(struct uvm_km_free_page *fp) 771 { 772 vaddr_t va = (vaddr_t)fp; 773 struct vm_page *pg; 774 int freeva = 1; 775 struct uvm_km_free_page *nextfp = fp->next; 776 777 pg = uvm_atopg(va); 778 779 pmap_kremove(va, PAGE_SIZE); 780 pmap_update(kernel_map->pmap); 781 782 mtx_enter(&uvm_km_pages.mtx); 783 if (uvm_km_pages.free < uvm_km_pages.hiwat) { 784 uvm_km_pages.page[uvm_km_pages.free++] = va; 785 freeva = 0; 786 } 787 mtx_leave(&uvm_km_pages.mtx); 788 789 if (freeva) 790 uvm_unmap(kernel_map, va, va + PAGE_SIZE); 791 792 uvm_pagefree(pg); 793 return (nextfp); 794 } 795 #endif /* !__HAVE_PMAP_DIRECT */ 796 797 void * 798 km_alloc(size_t sz, const struct kmem_va_mode *kv, 799 const struct kmem_pa_mode *kp, const struct kmem_dyn_mode *kd) 800 { 801 struct vm_map *map; 802 struct vm_page *pg; 803 struct pglist pgl; 804 int mapflags = 0; 805 vm_prot_t prot; 806 int pla_flags; 807 int pla_maxseg; 808 #ifdef __HAVE_PMAP_DIRECT 809 paddr_t pa; 810 #endif 811 vaddr_t va, sva; 812 813 KASSERT(sz == round_page(sz)); 814 815 TAILQ_INIT(&pgl); 816 817 if (kp->kp_nomem || kp->kp_pageable) 818 goto alloc_va; 819 820 pla_flags = kd->kd_waitok ? UVM_PLA_WAITOK : UVM_PLA_NOWAIT; 821 pla_flags |= UVM_PLA_TRYCONTIG; 822 if (kp->kp_zero) 823 pla_flags |= UVM_PLA_ZERO; 824 825 pla_maxseg = kp->kp_maxseg; 826 if (pla_maxseg == 0) 827 pla_maxseg = sz / PAGE_SIZE; 828 829 if (uvm_pglistalloc(sz, kp->kp_constraint->ucr_low, 830 kp->kp_constraint->ucr_high, kp->kp_align, kp->kp_boundary, 831 &pgl, pla_maxseg, pla_flags)) { 832 return (NULL); 833 } 834 835 #ifdef __HAVE_PMAP_DIRECT 836 if (kv->kv_align || kv->kv_executable) 837 goto alloc_va; 838 #if 1 839 /* 840 * For now, only do DIRECT mappings for single page 841 * allocations, until we figure out a good way to deal 842 * with contig allocations in km_free. 843 */ 844 if (!kv->kv_singlepage) 845 goto alloc_va; 846 #endif 847 /* 848 * Dubious optimization. If we got a contig segment, just map it 849 * through the direct map. 850 */ 851 TAILQ_FOREACH(pg, &pgl, pageq) { 852 if (pg != TAILQ_FIRST(&pgl) && 853 VM_PAGE_TO_PHYS(pg) != pa + PAGE_SIZE) 854 break; 855 pa = VM_PAGE_TO_PHYS(pg); 856 } 857 if (pg == NULL) { 858 TAILQ_FOREACH(pg, &pgl, pageq) { 859 vaddr_t v; 860 v = pmap_map_direct(pg); 861 if (pg == TAILQ_FIRST(&pgl)) 862 va = v; 863 } 864 return ((void *)va); 865 } 866 #endif 867 alloc_va: 868 if (kv->kv_executable) { 869 prot = VM_PROT_READ | VM_PROT_WRITE | VM_PROT_EXECUTE; 870 } else { 871 prot = VM_PROT_READ | VM_PROT_WRITE; 872 } 873 874 if (kp->kp_pageable) { 875 KASSERT(kp->kp_object); 876 KASSERT(!kv->kv_singlepage); 877 } else { 878 KASSERT(kp->kp_object == NULL); 879 } 880 881 if (kv->kv_singlepage) { 882 KASSERT(sz == PAGE_SIZE); 883 #ifdef __HAVE_PMAP_DIRECT 884 panic("km_alloc: DIRECT single page"); 885 #else 886 mtx_enter(&uvm_km_pages.mtx); 887 while (uvm_km_pages.free == 0) { 888 if (kd->kd_waitok == 0) { 889 mtx_leave(&uvm_km_pages.mtx); 890 uvm_pglistfree(&pgl); 891 return NULL; 892 } 893 msleep(&uvm_km_pages.free, &uvm_km_pages.mtx, PVM, 894 "getpage", 0); 895 } 896 va = uvm_km_pages.page[--uvm_km_pages.free]; 897 if (uvm_km_pages.free < uvm_km_pages.lowat && 898 curproc != uvm_km_pages.km_proc) { 899 if (kd->kd_slowdown) 900 *kd->kd_slowdown = 1; 901 wakeup(&uvm_km_pages.km_proc); 902 } 903 mtx_leave(&uvm_km_pages.mtx); 904 #endif 905 } else { 906 struct uvm_object *uobj = NULL; 907 908 if (kd->kd_trylock) 909 mapflags |= UVM_KMF_TRYLOCK; 910 911 if (kp->kp_object) 912 uobj = *kp->kp_object; 913 try_map: 914 map = *kv->kv_map; 915 va = vm_map_min(map); 916 if (uvm_map(map, &va, sz, uobj, kd->kd_prefer, 917 kv->kv_align, UVM_MAPFLAG(prot, prot, UVM_INH_NONE, 918 UVM_ADV_RANDOM, mapflags))) { 919 if (kv->kv_wait && kd->kd_waitok) { 920 tsleep(map, PVM, "km_allocva", 0); 921 goto try_map; 922 } 923 uvm_pglistfree(&pgl); 924 return (NULL); 925 } 926 } 927 sva = va; 928 TAILQ_FOREACH(pg, &pgl, pageq) { 929 if (kp->kp_pageable) 930 pmap_enter(pmap_kernel(), va, VM_PAGE_TO_PHYS(pg), 931 prot, prot | PMAP_WIRED); 932 else 933 pmap_kenter_pa(va, VM_PAGE_TO_PHYS(pg), prot); 934 va += PAGE_SIZE; 935 } 936 pmap_update(pmap_kernel()); 937 return ((void *)sva); 938 } 939 940 void 941 km_free(void *v, size_t sz, const struct kmem_va_mode *kv, 942 const struct kmem_pa_mode *kp) 943 { 944 vaddr_t sva, eva, va; 945 struct vm_page *pg; 946 struct pglist pgl; 947 948 sva = va = (vaddr_t)v; 949 eva = va + sz; 950 951 if (kp->kp_nomem) { 952 goto free_va; 953 } 954 955 if (kv->kv_singlepage) { 956 #ifdef __HAVE_PMAP_DIRECT 957 pg = pmap_unmap_direct(va); 958 uvm_pagefree(pg); 959 #else 960 struct uvm_km_free_page *fp = v; 961 mtx_enter(&uvm_km_pages.mtx); 962 fp->next = uvm_km_pages.freelist; 963 uvm_km_pages.freelist = fp; 964 if (uvm_km_pages.freelistlen++ > 16) 965 wakeup(&uvm_km_pages.km_proc); 966 mtx_leave(&uvm_km_pages.mtx); 967 #endif 968 return; 969 } 970 971 if (kp->kp_pageable) { 972 pmap_remove(pmap_kernel(), sva, eva); 973 pmap_update(pmap_kernel()); 974 } else { 975 TAILQ_INIT(&pgl); 976 for (va = sva; va < eva; va += PAGE_SIZE) { 977 paddr_t pa; 978 979 if (!pmap_extract(pmap_kernel(), va, &pa)) 980 continue; 981 982 pg = PHYS_TO_VM_PAGE(pa); 983 if (pg == NULL) { 984 panic("km_free: unmanaged page 0x%lx\n", pa); 985 } 986 TAILQ_INSERT_TAIL(&pgl, pg, pageq); 987 } 988 pmap_kremove(sva, sz); 989 pmap_update(pmap_kernel()); 990 uvm_pglistfree(&pgl); 991 } 992 free_va: 993 uvm_unmap(*kv->kv_map, sva, eva); 994 if (kv->kv_wait) 995 wakeup(*kv->kv_map); 996 } 997 998 const struct kmem_va_mode kv_any = { 999 .kv_map = &kernel_map, 1000 }; 1001 1002 const struct kmem_va_mode kv_intrsafe = { 1003 .kv_map = &kmem_map, 1004 }; 1005 1006 const struct kmem_va_mode kv_page = { 1007 .kv_singlepage = 1 1008 }; 1009 1010 const struct kmem_pa_mode kp_dirty = { 1011 .kp_constraint = &no_constraint 1012 }; 1013 1014 const struct kmem_pa_mode kp_dma = { 1015 .kp_constraint = &dma_constraint 1016 }; 1017 1018 const struct kmem_pa_mode kp_dma_contig = { 1019 .kp_constraint = &dma_constraint, 1020 .kp_maxseg = 1 1021 }; 1022 1023 const struct kmem_pa_mode kp_dma_zero = { 1024 .kp_constraint = &dma_constraint, 1025 .kp_zero = 1 1026 }; 1027 1028 const struct kmem_pa_mode kp_zero = { 1029 .kp_constraint = &no_constraint, 1030 .kp_zero = 1 1031 }; 1032 1033 const struct kmem_pa_mode kp_pageable = { 1034 .kp_object = &uvm.kernel_object, 1035 .kp_pageable = 1 1036 /* XXX - kp_nomem, maybe, but we'll need to fix km_free. */ 1037 }; 1038 1039 const struct kmem_pa_mode kp_none = { 1040 .kp_nomem = 1 1041 }; 1042 1043 const struct kmem_dyn_mode kd_waitok = { 1044 .kd_waitok = 1, 1045 .kd_prefer = UVM_UNKNOWN_OFFSET 1046 }; 1047 1048 const struct kmem_dyn_mode kd_nowait = { 1049 .kd_prefer = UVM_UNKNOWN_OFFSET 1050 }; 1051 1052 const struct kmem_dyn_mode kd_trylock = { 1053 .kd_trylock = 1, 1054 .kd_prefer = UVM_UNKNOWN_OFFSET 1055 }; 1056