1 /* $NetBSD: vm.c,v 1.108 2011/01/22 13:13:46 pooka Exp $ */ 2 3 /* 4 * Copyright (c) 2007-2010 Antti Kantee. All Rights Reserved. 5 * 6 * Development of this software was supported by 7 * The Finnish Cultural Foundation and the Research Foundation of 8 * The Helsinki University of Technology. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS 20 * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 21 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 22 * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 25 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 29 * SUCH DAMAGE. 30 */ 31 32 /* 33 * Virtual memory emulation routines. 34 */ 35 36 /* 37 * XXX: we abuse pg->uanon for the virtual address of the storage 38 * for each page. phys_addr would fit the job description better, 39 * except that it will create unnecessary lossage on some platforms 40 * due to not being a pointer type. 41 */ 42 43 #include <sys/cdefs.h> 44 __KERNEL_RCSID(0, "$NetBSD: vm.c,v 1.108 2011/01/22 13:13:46 pooka Exp $"); 45 46 #include <sys/param.h> 47 #include <sys/atomic.h> 48 #include <sys/buf.h> 49 #include <sys/kernel.h> 50 #include <sys/kmem.h> 51 #include <sys/mman.h> 52 #include <sys/null.h> 53 #include <sys/vnode.h> 54 55 #include <machine/pmap.h> 56 57 #include <rump/rumpuser.h> 58 59 #include <uvm/uvm.h> 60 #include <uvm/uvm_ddb.h> 61 #include <uvm/uvm_pdpolicy.h> 62 #include <uvm/uvm_prot.h> 63 #include <uvm/uvm_readahead.h> 64 65 #include "rump_private.h" 66 #include "rump_vfs_private.h" 67 68 kmutex_t uvm_pageqlock; 69 kmutex_t uvm_swap_data_lock; 70 71 struct uvmexp uvmexp; 72 int *uvmexp_pagesize; 73 int *uvmexp_pagemask; 74 int *uvmexp_pageshift; 75 struct uvm uvm; 76 77 struct vm_map rump_vmmap; 78 static struct vm_map_kernel kmem_map_store; 79 struct vm_map *kmem_map = &kmem_map_store.vmk_map; 80 81 static struct vm_map_kernel kernel_map_store; 82 struct vm_map *kernel_map = &kernel_map_store.vmk_map; 83 84 static unsigned int pdaemon_waiters; 85 static kmutex_t pdaemonmtx; 86 static kcondvar_t pdaemoncv, oomwait; 87 88 unsigned long rump_physmemlimit = RUMPMEM_UNLIMITED; 89 static unsigned long curphysmem; 90 static unsigned long dddlim; /* 90% of memory limit used */ 91 #define NEED_PAGEDAEMON() \ 92 (rump_physmemlimit != RUMPMEM_UNLIMITED && curphysmem > dddlim) 93 94 /* 95 * Try to free two pages worth of pages from objects. 96 * If this succesfully frees a full page cache page, we'll 97 * free the released page plus PAGE_SIZE�/sizeof(vm_page). 98 */ 99 #define PAGEDAEMON_OBJCHUNK (2*PAGE_SIZE / sizeof(struct vm_page)) 100 101 /* 102 * Keep a list of least recently used pages. Since the only way a 103 * rump kernel can "access" a page is via lookup, we put the page 104 * at the back of queue every time a lookup for it is done. If the 105 * page is in front of this global queue and we're short of memory, 106 * it's a candidate for pageout. 107 */ 108 static struct pglist vmpage_lruqueue; 109 static unsigned vmpage_onqueue; 110 111 static int 112 pg_compare_key(void *ctx, const void *n, const void *key) 113 { 114 voff_t a = ((const struct vm_page *)n)->offset; 115 voff_t b = *(const voff_t *)key; 116 117 if (a < b) 118 return -1; 119 else if (a > b) 120 return 1; 121 else 122 return 0; 123 } 124 125 static int 126 pg_compare_nodes(void *ctx, const void *n1, const void *n2) 127 { 128 129 return pg_compare_key(ctx, n1, &((const struct vm_page *)n2)->offset); 130 } 131 132 const rb_tree_ops_t uvm_page_tree_ops = { 133 .rbto_compare_nodes = pg_compare_nodes, 134 .rbto_compare_key = pg_compare_key, 135 .rbto_node_offset = offsetof(struct vm_page, rb_node), 136 .rbto_context = NULL 137 }; 138 139 /* 140 * vm pages 141 */ 142 143 static int 144 pgctor(void *arg, void *obj, int flags) 145 { 146 struct vm_page *pg = obj; 147 148 memset(pg, 0, sizeof(*pg)); 149 pg->uanon = rump_hypermalloc(PAGE_SIZE, PAGE_SIZE, 150 (flags & PR_WAITOK) == PR_WAITOK, "pgalloc"); 151 return pg->uanon == NULL; 152 } 153 154 static void 155 pgdtor(void *arg, void *obj) 156 { 157 struct vm_page *pg = obj; 158 159 rump_hyperfree(pg->uanon, PAGE_SIZE); 160 } 161 162 static struct pool_cache pagecache; 163 164 /* 165 * Called with the object locked. We don't support anons. 166 */ 167 struct vm_page * 168 uvm_pagealloc_strat(struct uvm_object *uobj, voff_t off, struct vm_anon *anon, 169 int flags, int strat, int free_list) 170 { 171 struct vm_page *pg; 172 173 KASSERT(uobj && mutex_owned(&uobj->vmobjlock)); 174 KASSERT(anon == NULL); 175 176 pg = pool_cache_get(&pagecache, PR_NOWAIT); 177 if (__predict_false(pg == NULL)) { 178 return NULL; 179 } 180 181 pg->offset = off; 182 pg->uobject = uobj; 183 184 pg->flags = PG_CLEAN|PG_BUSY|PG_FAKE; 185 if (flags & UVM_PGA_ZERO) { 186 uvm_pagezero(pg); 187 } 188 189 TAILQ_INSERT_TAIL(&uobj->memq, pg, listq.queue); 190 (void)rb_tree_insert_node(&uobj->rb_tree, pg); 191 192 /* 193 * Don't put anons on the LRU page queue. We can't flush them 194 * (there's no concept of swap in a rump kernel), so no reason 195 * to bother with them. 196 */ 197 if (!UVM_OBJ_IS_AOBJ(uobj)) { 198 atomic_inc_uint(&vmpage_onqueue); 199 mutex_enter(&uvm_pageqlock); 200 TAILQ_INSERT_TAIL(&vmpage_lruqueue, pg, pageq.queue); 201 mutex_exit(&uvm_pageqlock); 202 } 203 204 uobj->uo_npages++; 205 206 return pg; 207 } 208 209 /* 210 * Release a page. 211 * 212 * Called with the vm object locked. 213 */ 214 void 215 uvm_pagefree(struct vm_page *pg) 216 { 217 struct uvm_object *uobj = pg->uobject; 218 219 KASSERT(mutex_owned(&uvm_pageqlock)); 220 KASSERT(mutex_owned(&uobj->vmobjlock)); 221 222 if (pg->flags & PG_WANTED) 223 wakeup(pg); 224 225 TAILQ_REMOVE(&uobj->memq, pg, listq.queue); 226 227 uobj->uo_npages--; 228 rb_tree_remove_node(&uobj->rb_tree, pg); 229 230 if (!UVM_OBJ_IS_AOBJ(uobj)) { 231 TAILQ_REMOVE(&vmpage_lruqueue, pg, pageq.queue); 232 atomic_dec_uint(&vmpage_onqueue); 233 } 234 235 pool_cache_put(&pagecache, pg); 236 } 237 238 void 239 uvm_pagezero(struct vm_page *pg) 240 { 241 242 pg->flags &= ~PG_CLEAN; 243 memset((void *)pg->uanon, 0, PAGE_SIZE); 244 } 245 246 /* 247 * Misc routines 248 */ 249 250 static kmutex_t pagermtx; 251 252 void 253 uvm_init(void) 254 { 255 char buf[64]; 256 int error; 257 258 if (rumpuser_getenv("RUMP_MEMLIMIT", buf, sizeof(buf), &error) == 0) { 259 unsigned long tmp; 260 char *ep; 261 int mult; 262 263 tmp = strtoll(buf, &ep, 10); 264 if (strlen(ep) > 1) 265 panic("uvm_init: invalid RUMP_MEMLIMIT: %s", buf); 266 267 /* mini-dehumanize-number */ 268 mult = 1; 269 switch (*ep) { 270 case 'k': 271 mult = 1024; 272 break; 273 case 'm': 274 mult = 1024*1024; 275 break; 276 case 'g': 277 mult = 1024*1024*1024; 278 break; 279 case 0: 280 break; 281 default: 282 panic("uvm_init: invalid RUMP_MEMLIMIT: %s", buf); 283 } 284 rump_physmemlimit = tmp * mult; 285 286 if (rump_physmemlimit / mult != tmp) 287 panic("uvm_init: RUMP_MEMLIMIT overflow: %s", buf); 288 /* it's not like we'd get far with, say, 1 byte, but ... */ 289 if (rump_physmemlimit == 0) 290 panic("uvm_init: no memory"); 291 292 #define HUMANIZE_BYTES 9 293 CTASSERT(sizeof(buf) >= HUMANIZE_BYTES); 294 format_bytes(buf, HUMANIZE_BYTES, rump_physmemlimit); 295 #undef HUMANIZE_BYTES 296 dddlim = 9 * (rump_physmemlimit / 10); 297 } else { 298 strlcpy(buf, "unlimited (host limit)", sizeof(buf)); 299 } 300 aprint_verbose("total memory = %s\n", buf); 301 302 TAILQ_INIT(&vmpage_lruqueue); 303 304 uvmexp.free = 1024*1024; /* XXX: arbitrary & not updated */ 305 306 mutex_init(&pagermtx, MUTEX_DEFAULT, 0); 307 mutex_init(&uvm_pageqlock, MUTEX_DEFAULT, 0); 308 mutex_init(&uvm_swap_data_lock, MUTEX_DEFAULT, 0); 309 310 mutex_init(&pdaemonmtx, MUTEX_DEFAULT, 0); 311 cv_init(&pdaemoncv, "pdaemon"); 312 cv_init(&oomwait, "oomwait"); 313 314 kernel_map->pmap = pmap_kernel(); 315 callback_head_init(&kernel_map_store.vmk_reclaim_callback, IPL_VM); 316 kmem_map->pmap = pmap_kernel(); 317 callback_head_init(&kmem_map_store.vmk_reclaim_callback, IPL_VM); 318 319 pool_cache_bootstrap(&pagecache, sizeof(struct vm_page), 0, 0, 0, 320 "page$", NULL, IPL_NONE, pgctor, pgdtor, NULL); 321 } 322 323 void 324 uvmspace_init(struct vmspace *vm, struct pmap *pmap, vaddr_t vmin, vaddr_t vmax) 325 { 326 327 vm->vm_map.pmap = pmap_kernel(); 328 vm->vm_refcnt = 1; 329 } 330 331 void 332 uvm_pagewire(struct vm_page *pg) 333 { 334 335 /* nada */ 336 } 337 338 void 339 uvm_pageunwire(struct vm_page *pg) 340 { 341 342 /* nada */ 343 } 344 345 /* 346 * The uvm reclaim hook is not currently necessary because it is 347 * used only by ZFS and implements exactly the same functionality 348 * as the kva reclaim hook which we already run in the pagedaemon 349 * (rump vm does not have a concept of uvm_map(), so we cannot 350 * reclaim kva it when a mapping operation fails due to insufficient 351 * available kva). 352 */ 353 void 354 uvm_reclaim_hook_add(struct uvm_reclaim_hook *hook_entry) 355 { 356 357 } 358 __strong_alias(uvm_reclaim_hook_del,uvm_reclaim_hook_add); 359 360 /* where's your schmonz now? */ 361 #define PUNLIMIT(a) \ 362 p->p_rlimit[a].rlim_cur = p->p_rlimit[a].rlim_max = RLIM_INFINITY; 363 void 364 uvm_init_limits(struct proc *p) 365 { 366 367 PUNLIMIT(RLIMIT_STACK); 368 PUNLIMIT(RLIMIT_DATA); 369 PUNLIMIT(RLIMIT_RSS); 370 PUNLIMIT(RLIMIT_AS); 371 /* nice, cascade */ 372 } 373 #undef PUNLIMIT 374 375 /* 376 * This satisfies the "disgusting mmap hack" used by proplib. 377 * We probably should grow some more assertables to make sure we're 378 * not satisfying anything we shouldn't be satisfying. 379 */ 380 int 381 uvm_mmap(struct vm_map *map, vaddr_t *addr, vsize_t size, vm_prot_t prot, 382 vm_prot_t maxprot, int flags, void *handle, voff_t off, vsize_t locklim) 383 { 384 void *uaddr; 385 int error; 386 387 if (prot != (VM_PROT_READ | VM_PROT_WRITE)) 388 panic("uvm_mmap() variant unsupported"); 389 if (flags != (MAP_PRIVATE | MAP_ANON)) 390 panic("uvm_mmap() variant unsupported"); 391 392 /* no reason in particular, but cf. uvm_default_mapaddr() */ 393 if (*addr != 0) 394 panic("uvm_mmap() variant unsupported"); 395 396 if (RUMP_LOCALPROC_P(curproc)) { 397 uaddr = rumpuser_anonmmap(NULL, size, 0, 0, &error); 398 } else { 399 error = rumpuser_sp_anonmmap(curproc->p_vmspace->vm_map.pmap, 400 size, &uaddr); 401 } 402 if (uaddr == NULL) 403 return error; 404 405 *addr = (vaddr_t)uaddr; 406 return 0; 407 } 408 409 struct pagerinfo { 410 vaddr_t pgr_kva; 411 int pgr_npages; 412 struct vm_page **pgr_pgs; 413 bool pgr_read; 414 415 LIST_ENTRY(pagerinfo) pgr_entries; 416 }; 417 static LIST_HEAD(, pagerinfo) pagerlist = LIST_HEAD_INITIALIZER(pagerlist); 418 419 /* 420 * Pager "map" in routine. Instead of mapping, we allocate memory 421 * and copy page contents there. Not optimal or even strictly 422 * correct (the caller might modify the page contents after mapping 423 * them in), but what the heck. Assumes UVMPAGER_MAPIN_WAITOK. 424 */ 425 vaddr_t 426 uvm_pagermapin(struct vm_page **pgs, int npages, int flags) 427 { 428 struct pagerinfo *pgri; 429 vaddr_t curkva; 430 int i; 431 432 /* allocate structures */ 433 pgri = kmem_alloc(sizeof(*pgri), KM_SLEEP); 434 pgri->pgr_kva = (vaddr_t)kmem_alloc(npages * PAGE_SIZE, KM_SLEEP); 435 pgri->pgr_npages = npages; 436 pgri->pgr_pgs = kmem_alloc(sizeof(struct vm_page *) * npages, KM_SLEEP); 437 pgri->pgr_read = (flags & UVMPAGER_MAPIN_READ) != 0; 438 439 /* copy contents to "mapped" memory */ 440 for (i = 0, curkva = pgri->pgr_kva; 441 i < npages; 442 i++, curkva += PAGE_SIZE) { 443 /* 444 * We need to copy the previous contents of the pages to 445 * the window even if we are reading from the 446 * device, since the device might not fill the contents of 447 * the full mapped range and we will end up corrupting 448 * data when we unmap the window. 449 */ 450 memcpy((void*)curkva, pgs[i]->uanon, PAGE_SIZE); 451 pgri->pgr_pgs[i] = pgs[i]; 452 } 453 454 mutex_enter(&pagermtx); 455 LIST_INSERT_HEAD(&pagerlist, pgri, pgr_entries); 456 mutex_exit(&pagermtx); 457 458 return pgri->pgr_kva; 459 } 460 461 /* 462 * map out the pager window. return contents from VA to page storage 463 * and free structures. 464 * 465 * Note: does not currently support partial frees 466 */ 467 void 468 uvm_pagermapout(vaddr_t kva, int npages) 469 { 470 struct pagerinfo *pgri; 471 vaddr_t curkva; 472 int i; 473 474 mutex_enter(&pagermtx); 475 LIST_FOREACH(pgri, &pagerlist, pgr_entries) { 476 if (pgri->pgr_kva == kva) 477 break; 478 } 479 KASSERT(pgri); 480 if (pgri->pgr_npages != npages) 481 panic("uvm_pagermapout: partial unmapping not supported"); 482 LIST_REMOVE(pgri, pgr_entries); 483 mutex_exit(&pagermtx); 484 485 if (pgri->pgr_read) { 486 for (i = 0, curkva = pgri->pgr_kva; 487 i < pgri->pgr_npages; 488 i++, curkva += PAGE_SIZE) { 489 memcpy(pgri->pgr_pgs[i]->uanon,(void*)curkva,PAGE_SIZE); 490 } 491 } 492 493 kmem_free(pgri->pgr_pgs, npages * sizeof(struct vm_page *)); 494 kmem_free((void*)pgri->pgr_kva, npages * PAGE_SIZE); 495 kmem_free(pgri, sizeof(*pgri)); 496 } 497 498 /* 499 * convert va in pager window to page structure. 500 * XXX: how expensive is this (global lock, list traversal)? 501 */ 502 struct vm_page * 503 uvm_pageratop(vaddr_t va) 504 { 505 struct pagerinfo *pgri; 506 struct vm_page *pg = NULL; 507 int i; 508 509 mutex_enter(&pagermtx); 510 LIST_FOREACH(pgri, &pagerlist, pgr_entries) { 511 if (pgri->pgr_kva <= va 512 && va < pgri->pgr_kva + pgri->pgr_npages*PAGE_SIZE) 513 break; 514 } 515 if (pgri) { 516 i = (va - pgri->pgr_kva) >> PAGE_SHIFT; 517 pg = pgri->pgr_pgs[i]; 518 } 519 mutex_exit(&pagermtx); 520 521 return pg; 522 } 523 524 /* 525 * Called with the vm object locked. 526 * 527 * Put vnode object pages at the end of the access queue to indicate 528 * they have been recently accessed and should not be immediate 529 * candidates for pageout. Do not do this for lookups done by 530 * the pagedaemon to mimic pmap_kentered mappings which don't track 531 * access information. 532 */ 533 struct vm_page * 534 uvm_pagelookup(struct uvm_object *uobj, voff_t off) 535 { 536 struct vm_page *pg; 537 bool ispagedaemon = curlwp == uvm.pagedaemon_lwp; 538 539 pg = rb_tree_find_node(&uobj->rb_tree, &off); 540 if (pg && !UVM_OBJ_IS_AOBJ(pg->uobject) && !ispagedaemon) { 541 mutex_enter(&uvm_pageqlock); 542 TAILQ_REMOVE(&vmpage_lruqueue, pg, pageq.queue); 543 TAILQ_INSERT_TAIL(&vmpage_lruqueue, pg, pageq.queue); 544 mutex_exit(&uvm_pageqlock); 545 } 546 547 return pg; 548 } 549 550 void 551 uvm_page_unbusy(struct vm_page **pgs, int npgs) 552 { 553 struct vm_page *pg; 554 int i; 555 556 KASSERT(npgs > 0); 557 KASSERT(mutex_owned(&pgs[0]->uobject->vmobjlock)); 558 559 for (i = 0; i < npgs; i++) { 560 pg = pgs[i]; 561 if (pg == NULL) 562 continue; 563 564 KASSERT(pg->flags & PG_BUSY); 565 if (pg->flags & PG_WANTED) 566 wakeup(pg); 567 if (pg->flags & PG_RELEASED) 568 uvm_pagefree(pg); 569 else 570 pg->flags &= ~(PG_WANTED|PG_BUSY); 571 } 572 } 573 574 void 575 uvm_estimatepageable(int *active, int *inactive) 576 { 577 578 /* XXX: guessing game */ 579 *active = 1024; 580 *inactive = 1024; 581 } 582 583 struct vm_map_kernel * 584 vm_map_to_kernel(struct vm_map *map) 585 { 586 587 return (struct vm_map_kernel *)map; 588 } 589 590 bool 591 vm_map_starved_p(struct vm_map *map) 592 { 593 594 if (map->flags & VM_MAP_WANTVA) 595 return true; 596 597 return false; 598 } 599 600 int 601 uvm_loan(struct vm_map *map, vaddr_t start, vsize_t len, void *v, int flags) 602 { 603 604 panic("%s: unimplemented", __func__); 605 } 606 607 void 608 uvm_unloan(void *v, int npages, int flags) 609 { 610 611 panic("%s: unimplemented", __func__); 612 } 613 614 int 615 uvm_loanuobjpages(struct uvm_object *uobj, voff_t pgoff, int orignpages, 616 struct vm_page **opp) 617 { 618 619 return EBUSY; 620 } 621 622 #ifdef DEBUGPRINT 623 void 624 uvm_object_printit(struct uvm_object *uobj, bool full, 625 void (*pr)(const char *, ...)) 626 { 627 628 pr("VM OBJECT at %p, refs %d", uobj, uobj->uo_refs); 629 } 630 #endif 631 632 vaddr_t 633 uvm_default_mapaddr(struct proc *p, vaddr_t base, vsize_t sz) 634 { 635 636 return 0; 637 } 638 639 int 640 uvm_map_protect(struct vm_map *map, vaddr_t start, vaddr_t end, 641 vm_prot_t prot, bool set_max) 642 { 643 644 return EOPNOTSUPP; 645 } 646 647 /* 648 * UVM km 649 */ 650 651 vaddr_t 652 uvm_km_alloc(struct vm_map *map, vsize_t size, vsize_t align, uvm_flag_t flags) 653 { 654 void *rv, *desired = NULL; 655 int alignbit, error; 656 657 #ifdef __x86_64__ 658 /* 659 * On amd64, allocate all module memory from the lowest 2GB. 660 * This is because NetBSD kernel modules are compiled 661 * with -mcmodel=kernel and reserve only 4 bytes for 662 * offsets. If we load code compiled with -mcmodel=kernel 663 * anywhere except the lowest or highest 2GB, it will not 664 * work. Since userspace does not have access to the highest 665 * 2GB, use the lowest 2GB. 666 * 667 * Note: this assumes the rump kernel resides in 668 * the lowest 2GB as well. 669 * 670 * Note2: yes, it's a quick hack, but since this the only 671 * place where we care about the map we're allocating from, 672 * just use a simple "if" instead of coming up with a fancy 673 * generic solution. 674 */ 675 extern struct vm_map *module_map; 676 if (map == module_map) { 677 desired = (void *)(0x80000000 - size); 678 } 679 #endif 680 681 alignbit = 0; 682 if (align) { 683 alignbit = ffs(align)-1; 684 } 685 686 rv = rumpuser_anonmmap(desired, size, alignbit, flags & UVM_KMF_EXEC, 687 &error); 688 if (rv == NULL) { 689 if (flags & (UVM_KMF_CANFAIL | UVM_KMF_NOWAIT)) 690 return 0; 691 else 692 panic("uvm_km_alloc failed"); 693 } 694 695 if (flags & UVM_KMF_ZERO) 696 memset(rv, 0, size); 697 698 return (vaddr_t)rv; 699 } 700 701 void 702 uvm_km_free(struct vm_map *map, vaddr_t vaddr, vsize_t size, uvm_flag_t flags) 703 { 704 705 rumpuser_unmap((void *)vaddr, size); 706 } 707 708 struct vm_map * 709 uvm_km_suballoc(struct vm_map *map, vaddr_t *minaddr, vaddr_t *maxaddr, 710 vsize_t size, int pageable, bool fixed, struct vm_map_kernel *submap) 711 { 712 713 return (struct vm_map *)417416; 714 } 715 716 vaddr_t 717 uvm_km_alloc_poolpage(struct vm_map *map, bool waitok) 718 { 719 720 return (vaddr_t)rump_hypermalloc(PAGE_SIZE, PAGE_SIZE, 721 waitok, "kmalloc"); 722 } 723 724 void 725 uvm_km_free_poolpage(struct vm_map *map, vaddr_t addr) 726 { 727 728 rump_hyperfree((void *)addr, PAGE_SIZE); 729 } 730 731 vaddr_t 732 uvm_km_alloc_poolpage_cache(struct vm_map *map, bool waitok) 733 { 734 735 return uvm_km_alloc_poolpage(map, waitok); 736 } 737 738 void 739 uvm_km_free_poolpage_cache(struct vm_map *map, vaddr_t vaddr) 740 { 741 742 uvm_km_free_poolpage(map, vaddr); 743 } 744 745 void 746 uvm_km_va_drain(struct vm_map *map, uvm_flag_t flags) 747 { 748 749 /* we eventually maybe want some model for available memory */ 750 } 751 752 /* 753 * VM space locking routines. We don't really have to do anything, 754 * since the pages are always "wired" (both local and remote processes). 755 */ 756 int 757 uvm_vslock(struct vmspace *vs, void *addr, size_t len, vm_prot_t access) 758 { 759 760 return 0; 761 } 762 763 void 764 uvm_vsunlock(struct vmspace *vs, void *addr, size_t len) 765 { 766 767 } 768 769 /* 770 * For the local case the buffer mappers don't need to do anything. 771 * For the remote case we need to reserve space and copy data in or 772 * out, depending on B_READ/B_WRITE. 773 */ 774 void 775 vmapbuf(struct buf *bp, vsize_t len) 776 { 777 778 bp->b_saveaddr = bp->b_data; 779 780 /* remote case */ 781 if (!RUMP_LOCALPROC_P(curproc)) { 782 bp->b_data = rump_hypermalloc(len, 0, true, "vmapbuf"); 783 if (BUF_ISWRITE(bp)) { 784 copyin(bp->b_saveaddr, bp->b_data, len); 785 } 786 } 787 } 788 789 void 790 vunmapbuf(struct buf *bp, vsize_t len) 791 { 792 793 /* remote case */ 794 if (!RUMP_LOCALPROC_P(bp->b_proc)) { 795 if (BUF_ISREAD(bp)) { 796 copyout_proc(bp->b_proc, 797 bp->b_data, bp->b_saveaddr, len); 798 } 799 rump_hyperfree(bp->b_data, len); 800 } 801 802 bp->b_data = bp->b_saveaddr; 803 bp->b_saveaddr = 0; 804 } 805 806 void 807 uvmspace_addref(struct vmspace *vm) 808 { 809 810 /* 811 * No dynamically allocated vmspaces exist. 812 */ 813 } 814 815 void 816 uvmspace_free(struct vmspace *vm) 817 { 818 819 /* nothing for now */ 820 } 821 822 /* 823 * page life cycle stuff. it really doesn't exist, so just stubs. 824 */ 825 826 void 827 uvm_pageactivate(struct vm_page *pg) 828 { 829 830 /* nada */ 831 } 832 833 void 834 uvm_pagedeactivate(struct vm_page *pg) 835 { 836 837 /* nada */ 838 } 839 840 void 841 uvm_pagedequeue(struct vm_page *pg) 842 { 843 844 /* nada*/ 845 } 846 847 void 848 uvm_pageenqueue(struct vm_page *pg) 849 { 850 851 /* nada */ 852 } 853 854 void 855 uvmpdpol_anfree(struct vm_anon *an) 856 { 857 858 /* nada */ 859 } 860 861 /* 862 * Physical address accessors. 863 */ 864 865 struct vm_page * 866 uvm_phys_to_vm_page(paddr_t pa) 867 { 868 869 return NULL; 870 } 871 872 paddr_t 873 uvm_vm_page_to_phys(const struct vm_page *pg) 874 { 875 876 return 0; 877 } 878 879 /* 880 * Routines related to the Page Baroness. 881 */ 882 883 void 884 uvm_wait(const char *msg) 885 { 886 887 if (__predict_false(curlwp == uvm.pagedaemon_lwp)) 888 panic("pagedaemon out of memory"); 889 if (__predict_false(rump_threads == 0)) 890 panic("pagedaemon missing (RUMP_THREADS = 0)"); 891 892 mutex_enter(&pdaemonmtx); 893 pdaemon_waiters++; 894 cv_signal(&pdaemoncv); 895 cv_wait(&oomwait, &pdaemonmtx); 896 mutex_exit(&pdaemonmtx); 897 } 898 899 void 900 uvm_pageout_start(int npages) 901 { 902 903 /* we don't have the heuristics */ 904 } 905 906 void 907 uvm_pageout_done(int npages) 908 { 909 910 /* could wakeup waiters, but just let the pagedaemon do it */ 911 } 912 913 static bool 914 processpage(struct vm_page *pg, bool *lockrunning) 915 { 916 struct uvm_object *uobj; 917 918 uobj = pg->uobject; 919 if (mutex_tryenter(&uobj->vmobjlock)) { 920 if ((pg->flags & PG_BUSY) == 0) { 921 mutex_exit(&uvm_pageqlock); 922 uobj->pgops->pgo_put(uobj, pg->offset, 923 pg->offset + PAGE_SIZE, 924 PGO_CLEANIT|PGO_FREE); 925 KASSERT(!mutex_owned(&uobj->vmobjlock)); 926 return true; 927 } else { 928 mutex_exit(&uobj->vmobjlock); 929 } 930 } else if (*lockrunning == false && ncpu > 1) { 931 CPU_INFO_ITERATOR cii; 932 struct cpu_info *ci; 933 struct lwp *l; 934 935 l = mutex_owner(&uobj->vmobjlock); 936 for (CPU_INFO_FOREACH(cii, ci)) { 937 if (ci->ci_curlwp == l) { 938 *lockrunning = true; 939 break; 940 } 941 } 942 } 943 944 return false; 945 } 946 947 /* 948 * The Diabolical pageDaemon Director (DDD). 949 */ 950 void 951 uvm_pageout(void *arg) 952 { 953 struct vm_page *pg; 954 struct pool *pp, *pp_first; 955 uint64_t where; 956 int timo = 0; 957 int cleaned, skip, skipped; 958 bool succ = false; 959 bool lockrunning; 960 961 mutex_enter(&pdaemonmtx); 962 for (;;) { 963 if (succ) { 964 kernel_map->flags &= ~VM_MAP_WANTVA; 965 kmem_map->flags &= ~VM_MAP_WANTVA; 966 timo = 0; 967 if (pdaemon_waiters) { 968 pdaemon_waiters = 0; 969 cv_broadcast(&oomwait); 970 } 971 } 972 succ = false; 973 974 if (pdaemon_waiters == 0) { 975 cv_timedwait(&pdaemoncv, &pdaemonmtx, timo); 976 uvmexp.pdwoke++; 977 } 978 979 /* tell the world that we are hungry */ 980 kernel_map->flags |= VM_MAP_WANTVA; 981 kmem_map->flags |= VM_MAP_WANTVA; 982 983 if (pdaemon_waiters == 0 && !NEED_PAGEDAEMON()) 984 continue; 985 mutex_exit(&pdaemonmtx); 986 987 /* 988 * step one: reclaim the page cache. this should give 989 * us the biggest earnings since whole pages are released 990 * into backing memory. 991 */ 992 pool_cache_reclaim(&pagecache); 993 if (!NEED_PAGEDAEMON()) { 994 succ = true; 995 mutex_enter(&pdaemonmtx); 996 continue; 997 } 998 999 /* 1000 * Ok, so that didn't help. Next, try to hunt memory 1001 * by pushing out vnode pages. The pages might contain 1002 * useful cached data, but we need the memory. 1003 */ 1004 cleaned = 0; 1005 skip = 0; 1006 lockrunning = false; 1007 again: 1008 mutex_enter(&uvm_pageqlock); 1009 while (cleaned < PAGEDAEMON_OBJCHUNK) { 1010 skipped = 0; 1011 TAILQ_FOREACH(pg, &vmpage_lruqueue, pageq.queue) { 1012 1013 /* 1014 * skip over pages we _might_ have tried 1015 * to handle earlier. they might not be 1016 * exactly the same ones, but I'm not too 1017 * concerned. 1018 */ 1019 while (skipped++ < skip) 1020 continue; 1021 1022 if (processpage(pg, &lockrunning)) { 1023 cleaned++; 1024 goto again; 1025 } 1026 1027 skip++; 1028 } 1029 break; 1030 } 1031 mutex_exit(&uvm_pageqlock); 1032 1033 /* 1034 * Ok, someone is running with an object lock held. 1035 * We want to yield the host CPU to make sure the 1036 * thread is not parked on the host. Since sched_yield() 1037 * doesn't appear to do anything on NetBSD, nanosleep 1038 * for the smallest possible time and hope we're back in 1039 * the game soon. 1040 */ 1041 if (cleaned == 0 && lockrunning) { 1042 uint64_t sec, nsec; 1043 1044 sec = 0; 1045 nsec = 1; 1046 rumpuser_nanosleep(&sec, &nsec, NULL); 1047 1048 lockrunning = false; 1049 skip = 0; 1050 1051 /* and here we go again */ 1052 goto again; 1053 } 1054 1055 /* 1056 * And of course we need to reclaim the page cache 1057 * again to actually release memory. 1058 */ 1059 pool_cache_reclaim(&pagecache); 1060 if (!NEED_PAGEDAEMON()) { 1061 succ = true; 1062 mutex_enter(&pdaemonmtx); 1063 continue; 1064 } 1065 1066 /* 1067 * Still not there? sleeves come off right about now. 1068 * First: do reclaim on kernel/kmem map. 1069 */ 1070 callback_run_roundrobin(&kernel_map_store.vmk_reclaim_callback, 1071 NULL); 1072 callback_run_roundrobin(&kmem_map_store.vmk_reclaim_callback, 1073 NULL); 1074 1075 /* 1076 * And then drain the pools. Wipe them out ... all of them. 1077 */ 1078 1079 pool_drain_start(&pp_first, &where); 1080 pp = pp_first; 1081 for (;;) { 1082 rump_vfs_drainbufs(10 /* XXX: estimate better */); 1083 succ = pool_drain_end(pp, where); 1084 if (succ) 1085 break; 1086 pool_drain_start(&pp, &where); 1087 if (pp == pp_first) { 1088 succ = pool_drain_end(pp, where); 1089 break; 1090 } 1091 } 1092 1093 /* 1094 * Need to use PYEC on our bag of tricks. 1095 * Unfortunately, the wife just borrowed it. 1096 */ 1097 1098 if (!succ && cleaned == 0) { 1099 rumpuser_dprintf("pagedaemoness: failed to reclaim " 1100 "memory ... sleeping (deadlock?)\n"); 1101 timo = hz; 1102 } 1103 1104 mutex_enter(&pdaemonmtx); 1105 } 1106 1107 panic("you can swap out any time you like, but you can never leave"); 1108 } 1109 1110 void 1111 uvm_kick_pdaemon() 1112 { 1113 1114 /* 1115 * Wake up the diabolical pagedaemon director if we are over 1116 * 90% of the memory limit. This is a complete and utter 1117 * stetson-harrison decision which you are allowed to finetune. 1118 * Don't bother locking. If we have some unflushed caches, 1119 * other waker-uppers will deal with the issue. 1120 */ 1121 if (NEED_PAGEDAEMON()) { 1122 cv_signal(&pdaemoncv); 1123 } 1124 } 1125 1126 void * 1127 rump_hypermalloc(size_t howmuch, int alignment, bool waitok, const char *wmsg) 1128 { 1129 unsigned long newmem; 1130 void *rv; 1131 1132 uvm_kick_pdaemon(); /* ouch */ 1133 1134 /* first we must be within the limit */ 1135 limitagain: 1136 if (rump_physmemlimit != RUMPMEM_UNLIMITED) { 1137 newmem = atomic_add_long_nv(&curphysmem, howmuch); 1138 if (newmem > rump_physmemlimit) { 1139 newmem = atomic_add_long_nv(&curphysmem, -howmuch); 1140 if (!waitok) { 1141 return NULL; 1142 } 1143 uvm_wait(wmsg); 1144 goto limitagain; 1145 } 1146 } 1147 1148 /* second, we must get something from the backend */ 1149 again: 1150 rv = rumpuser_malloc(howmuch, alignment); 1151 if (__predict_false(rv == NULL && waitok)) { 1152 uvm_wait(wmsg); 1153 goto again; 1154 } 1155 1156 return rv; 1157 } 1158 1159 void 1160 rump_hyperfree(void *what, size_t size) 1161 { 1162 1163 if (rump_physmemlimit != RUMPMEM_UNLIMITED) { 1164 atomic_add_long(&curphysmem, -size); 1165 } 1166 rumpuser_free(what); 1167 } 1168