1 /* $NetBSD: vm.c,v 1.173 2017/05/14 13:49:55 nat Exp $ */ 2 3 /* 4 * Copyright (c) 2007-2011 Antti Kantee. All Rights Reserved. 5 * 6 * Development of this software was supported by 7 * The Finnish Cultural Foundation and the Research Foundation of 8 * The Helsinki University of Technology. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS 20 * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 21 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 22 * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 25 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 29 * SUCH DAMAGE. 30 */ 31 32 /* 33 * Virtual memory emulation routines. 34 */ 35 36 /* 37 * XXX: we abuse pg->uanon for the virtual address of the storage 38 * for each page. phys_addr would fit the job description better, 39 * except that it will create unnecessary lossage on some platforms 40 * due to not being a pointer type. 41 */ 42 43 #include <sys/cdefs.h> 44 __KERNEL_RCSID(0, "$NetBSD: vm.c,v 1.173 2017/05/14 13:49:55 nat Exp $"); 45 46 #include <sys/param.h> 47 #include <sys/atomic.h> 48 #include <sys/buf.h> 49 #include <sys/kernel.h> 50 #include <sys/kmem.h> 51 #include <sys/vmem.h> 52 #include <sys/mman.h> 53 #include <sys/null.h> 54 #include <sys/vnode.h> 55 56 #include <machine/pmap.h> 57 58 #include <uvm/uvm.h> 59 #include <uvm/uvm_ddb.h> 60 #include <uvm/uvm_pdpolicy.h> 61 #include <uvm/uvm_prot.h> 62 #include <uvm/uvm_readahead.h> 63 #include <uvm/uvm_device.h> 64 65 #include <rump-sys/kern.h> 66 #include <rump-sys/vfs.h> 67 68 #include <rump/rumpuser.h> 69 70 kmutex_t uvm_pageqlock; /* non-free page lock */ 71 kmutex_t uvm_fpageqlock; /* free page lock, non-gpl license */ 72 kmutex_t uvm_swap_data_lock; 73 74 struct uvmexp uvmexp; 75 struct uvm uvm; 76 77 #ifdef __uvmexp_pagesize 78 const int * const uvmexp_pagesize = &uvmexp.pagesize; 79 const int * const uvmexp_pagemask = &uvmexp.pagemask; 80 const int * const uvmexp_pageshift = &uvmexp.pageshift; 81 #endif 82 83 static struct vm_map kernel_map_store; 84 struct vm_map *kernel_map = &kernel_map_store; 85 86 static struct vm_map module_map_store; 87 extern struct vm_map *module_map; 88 89 static struct pmap pmap_kernel; 90 struct pmap rump_pmap_local; 91 struct pmap *const kernel_pmap_ptr = &pmap_kernel; 92 93 vmem_t *kmem_arena; 94 vmem_t *kmem_va_arena; 95 96 static unsigned int pdaemon_waiters; 97 static kmutex_t pdaemonmtx; 98 static kcondvar_t pdaemoncv, oomwait; 99 100 /* all local non-proc0 processes share this vmspace */ 101 struct vmspace *rump_vmspace_local; 102 103 unsigned long rump_physmemlimit = RUMPMEM_UNLIMITED; 104 static unsigned long pdlimit = RUMPMEM_UNLIMITED; /* page daemon memlimit */ 105 static unsigned long curphysmem; 106 static unsigned long dddlim; /* 90% of memory limit used */ 107 #define NEED_PAGEDAEMON() \ 108 (rump_physmemlimit != RUMPMEM_UNLIMITED && curphysmem > dddlim) 109 #define PDRESERVE (2*MAXPHYS) 110 111 /* 112 * Try to free two pages worth of pages from objects. 113 * If this succesfully frees a full page cache page, we'll 114 * free the released page plus PAGE_SIZE/sizeof(vm_page). 115 */ 116 #define PAGEDAEMON_OBJCHUNK (2*PAGE_SIZE / sizeof(struct vm_page)) 117 118 /* 119 * Keep a list of least recently used pages. Since the only way a 120 * rump kernel can "access" a page is via lookup, we put the page 121 * at the back of queue every time a lookup for it is done. If the 122 * page is in front of this global queue and we're short of memory, 123 * it's a candidate for pageout. 124 */ 125 static struct pglist vmpage_lruqueue; 126 static unsigned vmpage_onqueue; 127 128 static int 129 pg_compare_key(void *ctx, const void *n, const void *key) 130 { 131 voff_t a = ((const struct vm_page *)n)->offset; 132 voff_t b = *(const voff_t *)key; 133 134 if (a < b) 135 return -1; 136 else if (a > b) 137 return 1; 138 else 139 return 0; 140 } 141 142 static int 143 pg_compare_nodes(void *ctx, const void *n1, const void *n2) 144 { 145 146 return pg_compare_key(ctx, n1, &((const struct vm_page *)n2)->offset); 147 } 148 149 const rb_tree_ops_t uvm_page_tree_ops = { 150 .rbto_compare_nodes = pg_compare_nodes, 151 .rbto_compare_key = pg_compare_key, 152 .rbto_node_offset = offsetof(struct vm_page, rb_node), 153 .rbto_context = NULL 154 }; 155 156 /* 157 * vm pages 158 */ 159 160 static int 161 pgctor(void *arg, void *obj, int flags) 162 { 163 struct vm_page *pg = obj; 164 165 memset(pg, 0, sizeof(*pg)); 166 pg->uanon = rump_hypermalloc(PAGE_SIZE, PAGE_SIZE, 167 (flags & PR_WAITOK) == PR_WAITOK, "pgalloc"); 168 return pg->uanon == NULL; 169 } 170 171 static void 172 pgdtor(void *arg, void *obj) 173 { 174 struct vm_page *pg = obj; 175 176 rump_hyperfree(pg->uanon, PAGE_SIZE); 177 } 178 179 static struct pool_cache pagecache; 180 181 /* 182 * Called with the object locked. We don't support anons. 183 */ 184 struct vm_page * 185 uvm_pagealloc_strat(struct uvm_object *uobj, voff_t off, struct vm_anon *anon, 186 int flags, int strat, int free_list) 187 { 188 struct vm_page *pg; 189 190 KASSERT(uobj && mutex_owned(uobj->vmobjlock)); 191 KASSERT(anon == NULL); 192 193 pg = pool_cache_get(&pagecache, PR_NOWAIT); 194 if (__predict_false(pg == NULL)) { 195 return NULL; 196 } 197 198 pg->offset = off; 199 pg->uobject = uobj; 200 201 pg->flags = PG_CLEAN|PG_BUSY|PG_FAKE; 202 if (flags & UVM_PGA_ZERO) { 203 uvm_pagezero(pg); 204 } 205 206 TAILQ_INSERT_TAIL(&uobj->memq, pg, listq.queue); 207 (void)rb_tree_insert_node(&uobj->rb_tree, pg); 208 209 /* 210 * Don't put anons on the LRU page queue. We can't flush them 211 * (there's no concept of swap in a rump kernel), so no reason 212 * to bother with them. 213 */ 214 if (!UVM_OBJ_IS_AOBJ(uobj)) { 215 atomic_inc_uint(&vmpage_onqueue); 216 mutex_enter(&uvm_pageqlock); 217 TAILQ_INSERT_TAIL(&vmpage_lruqueue, pg, pageq.queue); 218 mutex_exit(&uvm_pageqlock); 219 } 220 221 uobj->uo_npages++; 222 223 return pg; 224 } 225 226 /* 227 * Release a page. 228 * 229 * Called with the vm object locked. 230 */ 231 void 232 uvm_pagefree(struct vm_page *pg) 233 { 234 struct uvm_object *uobj = pg->uobject; 235 236 KASSERT(mutex_owned(&uvm_pageqlock)); 237 KASSERT(mutex_owned(uobj->vmobjlock)); 238 239 if (pg->flags & PG_WANTED) 240 wakeup(pg); 241 242 TAILQ_REMOVE(&uobj->memq, pg, listq.queue); 243 244 uobj->uo_npages--; 245 rb_tree_remove_node(&uobj->rb_tree, pg); 246 247 if (!UVM_OBJ_IS_AOBJ(uobj)) { 248 TAILQ_REMOVE(&vmpage_lruqueue, pg, pageq.queue); 249 atomic_dec_uint(&vmpage_onqueue); 250 } 251 252 pool_cache_put(&pagecache, pg); 253 } 254 255 void 256 uvm_pagezero(struct vm_page *pg) 257 { 258 259 pg->flags &= ~PG_CLEAN; 260 memset((void *)pg->uanon, 0, PAGE_SIZE); 261 } 262 263 /* 264 * uvm_page_locked_p: return true if object associated with page is 265 * locked. this is a weak check for runtime assertions only. 266 */ 267 268 bool 269 uvm_page_locked_p(struct vm_page *pg) 270 { 271 272 return mutex_owned(pg->uobject->vmobjlock); 273 } 274 275 /* 276 * Misc routines 277 */ 278 279 static kmutex_t pagermtx; 280 281 void 282 uvm_init(void) 283 { 284 char buf[64]; 285 286 if (rumpuser_getparam("RUMP_MEMLIMIT", buf, sizeof(buf)) == 0) { 287 unsigned long tmp; 288 char *ep; 289 int mult; 290 291 tmp = strtoul(buf, &ep, 10); 292 if (strlen(ep) > 1) 293 panic("uvm_init: invalid RUMP_MEMLIMIT: %s", buf); 294 295 /* mini-dehumanize-number */ 296 mult = 1; 297 switch (*ep) { 298 case 'k': 299 mult = 1024; 300 break; 301 case 'm': 302 mult = 1024*1024; 303 break; 304 case 'g': 305 mult = 1024*1024*1024; 306 break; 307 case 0: 308 break; 309 default: 310 panic("uvm_init: invalid RUMP_MEMLIMIT: %s", buf); 311 } 312 rump_physmemlimit = tmp * mult; 313 314 if (rump_physmemlimit / mult != tmp) 315 panic("uvm_init: RUMP_MEMLIMIT overflow: %s", buf); 316 317 /* reserve some memory for the pager */ 318 if (rump_physmemlimit <= PDRESERVE) 319 panic("uvm_init: system reserves %d bytes of mem, " 320 "only %lu bytes given", 321 PDRESERVE, rump_physmemlimit); 322 pdlimit = rump_physmemlimit; 323 rump_physmemlimit -= PDRESERVE; 324 325 if (pdlimit < 1024*1024) 326 printf("uvm_init: WARNING: <1MB RAM limit, " 327 "hope you know what you're doing\n"); 328 329 #define HUMANIZE_BYTES 9 330 CTASSERT(sizeof(buf) >= HUMANIZE_BYTES); 331 format_bytes(buf, HUMANIZE_BYTES, rump_physmemlimit); 332 #undef HUMANIZE_BYTES 333 dddlim = 9 * (rump_physmemlimit / 10); 334 } else { 335 strlcpy(buf, "unlimited (host limit)", sizeof(buf)); 336 } 337 aprint_verbose("total memory = %s\n", buf); 338 339 TAILQ_INIT(&vmpage_lruqueue); 340 341 if (rump_physmemlimit == RUMPMEM_UNLIMITED) { 342 uvmexp.npages = physmem; 343 } else { 344 uvmexp.npages = pdlimit >> PAGE_SHIFT; 345 uvmexp.reserve_pagedaemon = PDRESERVE >> PAGE_SHIFT; 346 uvmexp.freetarg = (rump_physmemlimit-dddlim) >> PAGE_SHIFT; 347 } 348 /* 349 * uvmexp.free is not used internally or updated. The reason is 350 * that the memory hypercall allocator is allowed to allocate 351 * non-page sized chunks. We use a byte count in curphysmem 352 * instead. 353 */ 354 uvmexp.free = uvmexp.npages; 355 356 #ifndef __uvmexp_pagesize 357 uvmexp.pagesize = PAGE_SIZE; 358 uvmexp.pagemask = PAGE_MASK; 359 uvmexp.pageshift = PAGE_SHIFT; 360 #else 361 #define FAKE_PAGE_SHIFT 12 362 uvmexp.pageshift = FAKE_PAGE_SHIFT; 363 uvmexp.pagesize = 1<<FAKE_PAGE_SHIFT; 364 uvmexp.pagemask = (1<<FAKE_PAGE_SHIFT)-1; 365 #undef FAKE_PAGE_SHIFT 366 #endif 367 368 mutex_init(&pagermtx, MUTEX_DEFAULT, IPL_NONE); 369 mutex_init(&uvm_pageqlock, MUTEX_DEFAULT, IPL_NONE); 370 mutex_init(&uvm_swap_data_lock, MUTEX_DEFAULT, IPL_NONE); 371 372 /* just to appease linkage */ 373 mutex_init(&uvm_fpageqlock, MUTEX_SPIN, IPL_VM); 374 375 mutex_init(&pdaemonmtx, MUTEX_DEFAULT, IPL_NONE); 376 cv_init(&pdaemoncv, "pdaemon"); 377 cv_init(&oomwait, "oomwait"); 378 379 module_map = &module_map_store; 380 381 kernel_map->pmap = pmap_kernel(); 382 383 pool_subsystem_init(); 384 385 kmem_arena = vmem_create("kmem", 0, 1024*1024, PAGE_SIZE, 386 NULL, NULL, NULL, 387 0, VM_NOSLEEP | VM_BOOTSTRAP, IPL_VM); 388 389 vmem_subsystem_init(kmem_arena); 390 391 kmem_va_arena = vmem_create("kva", 0, 0, PAGE_SIZE, 392 vmem_alloc, vmem_free, kmem_arena, 393 8 * PAGE_SIZE, VM_NOSLEEP | VM_BOOTSTRAP, IPL_VM); 394 395 pool_cache_bootstrap(&pagecache, sizeof(struct vm_page), 0, 0, 0, 396 "page$", NULL, IPL_NONE, pgctor, pgdtor, NULL); 397 398 /* create vmspace used by local clients */ 399 rump_vmspace_local = kmem_zalloc(sizeof(*rump_vmspace_local), KM_SLEEP); 400 uvmspace_init(rump_vmspace_local, &rump_pmap_local, 0, 0, false); 401 } 402 403 void 404 uvmspace_init(struct vmspace *vm, struct pmap *pmap, vaddr_t vmin, vaddr_t vmax, 405 bool topdown) 406 { 407 408 vm->vm_map.pmap = pmap; 409 vm->vm_refcnt = 1; 410 } 411 412 int 413 uvm_map_pageable(struct vm_map *map, vaddr_t start, vaddr_t end, 414 bool new_pageable, int lockflags) 415 { 416 return 0; 417 } 418 419 void 420 uvm_pagewire(struct vm_page *pg) 421 { 422 423 /* nada */ 424 } 425 426 void 427 uvm_pageunwire(struct vm_page *pg) 428 { 429 430 /* nada */ 431 } 432 433 /* where's your schmonz now? */ 434 #define PUNLIMIT(a) \ 435 p->p_rlimit[a].rlim_cur = p->p_rlimit[a].rlim_max = RLIM_INFINITY; 436 void 437 uvm_init_limits(struct proc *p) 438 { 439 440 #ifndef DFLSSIZ 441 #define DFLSSIZ (16*1024*1024) 442 #endif 443 p->p_rlimit[RLIMIT_STACK].rlim_cur = DFLSSIZ; 444 p->p_rlimit[RLIMIT_STACK].rlim_max = MAXSSIZ; 445 PUNLIMIT(RLIMIT_DATA); 446 PUNLIMIT(RLIMIT_RSS); 447 PUNLIMIT(RLIMIT_AS); 448 /* nice, cascade */ 449 } 450 #undef PUNLIMIT 451 452 /* 453 * This satisfies the "disgusting mmap hack" used by proplib. 454 */ 455 int 456 uvm_mmap_anon(struct proc *p, void **addrp, size_t size) 457 { 458 int error; 459 460 /* no reason in particular, but cf. uvm_default_mapaddr() */ 461 if (*addrp != NULL) 462 panic("uvm_mmap() variant unsupported"); 463 464 if (RUMP_LOCALPROC_P(curproc)) { 465 error = rumpuser_anonmmap(NULL, size, 0, 0, addrp); 466 } else { 467 error = rump_sysproxy_anonmmap(RUMP_SPVM2CTL(p->p_vmspace), 468 size, addrp); 469 } 470 return error; 471 } 472 473 /* 474 * Stubs for things referenced from vfs_vnode.c but not used. 475 */ 476 const dev_t zerodev; 477 478 struct uvm_object * 479 udv_attach(dev_t device, vm_prot_t accessprot, voff_t off, vsize_t size) 480 { 481 return NULL; 482 } 483 484 struct pagerinfo { 485 vaddr_t pgr_kva; 486 int pgr_npages; 487 struct vm_page **pgr_pgs; 488 bool pgr_read; 489 490 LIST_ENTRY(pagerinfo) pgr_entries; 491 }; 492 static LIST_HEAD(, pagerinfo) pagerlist = LIST_HEAD_INITIALIZER(pagerlist); 493 494 /* 495 * Pager "map" in routine. Instead of mapping, we allocate memory 496 * and copy page contents there. The reason for copying instead of 497 * mapping is simple: we do not assume we are running on virtual 498 * memory. Even if we could emulate virtual memory in some envs 499 * such as userspace, copying is much faster than trying to awkardly 500 * cope with remapping (see "Design and Implementation" pp.95-98). 501 * The downside of the approach is that the pager requires MAXPHYS 502 * free memory to perform paging, but short of virtual memory or 503 * making the pager do I/O in page-sized chunks we cannot do much 504 * about that. 505 */ 506 vaddr_t 507 uvm_pagermapin(struct vm_page **pgs, int npages, int flags) 508 { 509 struct pagerinfo *pgri; 510 vaddr_t curkva; 511 int i; 512 513 /* allocate structures */ 514 pgri = kmem_alloc(sizeof(*pgri), KM_SLEEP); 515 pgri->pgr_kva = (vaddr_t)kmem_alloc(npages * PAGE_SIZE, KM_SLEEP); 516 pgri->pgr_npages = npages; 517 pgri->pgr_pgs = kmem_alloc(sizeof(struct vm_page *) * npages, KM_SLEEP); 518 pgri->pgr_read = (flags & UVMPAGER_MAPIN_READ) != 0; 519 520 /* copy contents to "mapped" memory */ 521 for (i = 0, curkva = pgri->pgr_kva; 522 i < npages; 523 i++, curkva += PAGE_SIZE) { 524 /* 525 * We need to copy the previous contents of the pages to 526 * the window even if we are reading from the 527 * device, since the device might not fill the contents of 528 * the full mapped range and we will end up corrupting 529 * data when we unmap the window. 530 */ 531 memcpy((void*)curkva, pgs[i]->uanon, PAGE_SIZE); 532 pgri->pgr_pgs[i] = pgs[i]; 533 } 534 535 mutex_enter(&pagermtx); 536 LIST_INSERT_HEAD(&pagerlist, pgri, pgr_entries); 537 mutex_exit(&pagermtx); 538 539 return pgri->pgr_kva; 540 } 541 542 /* 543 * map out the pager window. return contents from VA to page storage 544 * and free structures. 545 * 546 * Note: does not currently support partial frees 547 */ 548 void 549 uvm_pagermapout(vaddr_t kva, int npages) 550 { 551 struct pagerinfo *pgri; 552 vaddr_t curkva; 553 int i; 554 555 mutex_enter(&pagermtx); 556 LIST_FOREACH(pgri, &pagerlist, pgr_entries) { 557 if (pgri->pgr_kva == kva) 558 break; 559 } 560 KASSERT(pgri); 561 if (pgri->pgr_npages != npages) 562 panic("uvm_pagermapout: partial unmapping not supported"); 563 LIST_REMOVE(pgri, pgr_entries); 564 mutex_exit(&pagermtx); 565 566 if (pgri->pgr_read) { 567 for (i = 0, curkva = pgri->pgr_kva; 568 i < pgri->pgr_npages; 569 i++, curkva += PAGE_SIZE) { 570 memcpy(pgri->pgr_pgs[i]->uanon,(void*)curkva,PAGE_SIZE); 571 } 572 } 573 574 kmem_free(pgri->pgr_pgs, npages * sizeof(struct vm_page *)); 575 kmem_free((void*)pgri->pgr_kva, npages * PAGE_SIZE); 576 kmem_free(pgri, sizeof(*pgri)); 577 } 578 579 /* 580 * convert va in pager window to page structure. 581 * XXX: how expensive is this (global lock, list traversal)? 582 */ 583 struct vm_page * 584 uvm_pageratop(vaddr_t va) 585 { 586 struct pagerinfo *pgri; 587 struct vm_page *pg = NULL; 588 int i; 589 590 mutex_enter(&pagermtx); 591 LIST_FOREACH(pgri, &pagerlist, pgr_entries) { 592 if (pgri->pgr_kva <= va 593 && va < pgri->pgr_kva + pgri->pgr_npages*PAGE_SIZE) 594 break; 595 } 596 if (pgri) { 597 i = (va - pgri->pgr_kva) >> PAGE_SHIFT; 598 pg = pgri->pgr_pgs[i]; 599 } 600 mutex_exit(&pagermtx); 601 602 return pg; 603 } 604 605 /* 606 * Called with the vm object locked. 607 * 608 * Put vnode object pages at the end of the access queue to indicate 609 * they have been recently accessed and should not be immediate 610 * candidates for pageout. Do not do this for lookups done by 611 * the pagedaemon to mimic pmap_kentered mappings which don't track 612 * access information. 613 */ 614 struct vm_page * 615 uvm_pagelookup(struct uvm_object *uobj, voff_t off) 616 { 617 struct vm_page *pg; 618 bool ispagedaemon = curlwp == uvm.pagedaemon_lwp; 619 620 pg = rb_tree_find_node(&uobj->rb_tree, &off); 621 if (pg && !UVM_OBJ_IS_AOBJ(pg->uobject) && !ispagedaemon) { 622 mutex_enter(&uvm_pageqlock); 623 TAILQ_REMOVE(&vmpage_lruqueue, pg, pageq.queue); 624 TAILQ_INSERT_TAIL(&vmpage_lruqueue, pg, pageq.queue); 625 mutex_exit(&uvm_pageqlock); 626 } 627 628 return pg; 629 } 630 631 void 632 uvm_page_unbusy(struct vm_page **pgs, int npgs) 633 { 634 struct vm_page *pg; 635 int i; 636 637 KASSERT(npgs > 0); 638 KASSERT(mutex_owned(pgs[0]->uobject->vmobjlock)); 639 640 for (i = 0; i < npgs; i++) { 641 pg = pgs[i]; 642 if (pg == NULL) 643 continue; 644 645 KASSERT(pg->flags & PG_BUSY); 646 if (pg->flags & PG_WANTED) 647 wakeup(pg); 648 if (pg->flags & PG_RELEASED) 649 uvm_pagefree(pg); 650 else 651 pg->flags &= ~(PG_WANTED|PG_BUSY); 652 } 653 } 654 655 void 656 uvm_estimatepageable(int *active, int *inactive) 657 { 658 659 /* XXX: guessing game */ 660 *active = 1024; 661 *inactive = 1024; 662 } 663 664 bool 665 vm_map_starved_p(struct vm_map *map) 666 { 667 668 if (map->flags & VM_MAP_WANTVA) 669 return true; 670 671 return false; 672 } 673 674 int 675 uvm_loan(struct vm_map *map, vaddr_t start, vsize_t len, void *v, int flags) 676 { 677 678 panic("%s: unimplemented", __func__); 679 } 680 681 void 682 uvm_unloan(void *v, int npages, int flags) 683 { 684 685 panic("%s: unimplemented", __func__); 686 } 687 688 int 689 uvm_loanuobjpages(struct uvm_object *uobj, voff_t pgoff, int orignpages, 690 struct vm_page **opp) 691 { 692 693 return EBUSY; 694 } 695 696 struct vm_page * 697 uvm_loanbreak(struct vm_page *pg) 698 { 699 700 panic("%s: unimplemented", __func__); 701 } 702 703 void 704 ubc_purge(struct uvm_object *uobj) 705 { 706 707 } 708 709 vaddr_t 710 uvm_default_mapaddr(struct proc *p, vaddr_t base, vsize_t sz, int topdown) 711 { 712 713 return 0; 714 } 715 716 int 717 uvm_map_protect(struct vm_map *map, vaddr_t start, vaddr_t end, 718 vm_prot_t prot, bool set_max) 719 { 720 721 return EOPNOTSUPP; 722 } 723 724 int 725 uvm_map(struct vm_map *map, vaddr_t *startp, vsize_t size, 726 struct uvm_object *uobj, voff_t uoffset, vsize_t align, 727 uvm_flag_t flags) 728 { 729 730 *startp = (vaddr_t)rump_hypermalloc(size, align, true, "uvm_map"); 731 return *startp != 0 ? 0 : ENOMEM; 732 } 733 734 void 735 uvm_unmap1(struct vm_map *map, vaddr_t start, vaddr_t end, int flags) 736 { 737 738 rump_hyperfree((void*)start, end-start); 739 } 740 741 742 /* 743 * UVM km 744 */ 745 746 vaddr_t 747 uvm_km_alloc(struct vm_map *map, vsize_t size, vsize_t align, uvm_flag_t flags) 748 { 749 void *rv, *desired = NULL; 750 int alignbit, error; 751 752 #ifdef __x86_64__ 753 /* 754 * On amd64, allocate all module memory from the lowest 2GB. 755 * This is because NetBSD kernel modules are compiled 756 * with -mcmodel=kernel and reserve only 4 bytes for 757 * offsets. If we load code compiled with -mcmodel=kernel 758 * anywhere except the lowest or highest 2GB, it will not 759 * work. Since userspace does not have access to the highest 760 * 2GB, use the lowest 2GB. 761 * 762 * Note: this assumes the rump kernel resides in 763 * the lowest 2GB as well. 764 * 765 * Note2: yes, it's a quick hack, but since this the only 766 * place where we care about the map we're allocating from, 767 * just use a simple "if" instead of coming up with a fancy 768 * generic solution. 769 */ 770 if (map == module_map) { 771 desired = (void *)(0x80000000 - size); 772 } 773 #endif 774 775 if (__predict_false(map == module_map)) { 776 alignbit = 0; 777 if (align) { 778 alignbit = ffs(align)-1; 779 } 780 error = rumpuser_anonmmap(desired, size, alignbit, 781 flags & UVM_KMF_EXEC, &rv); 782 } else { 783 error = rumpuser_malloc(size, align, &rv); 784 } 785 786 if (error) { 787 if (flags & (UVM_KMF_CANFAIL | UVM_KMF_NOWAIT)) 788 return 0; 789 else 790 panic("uvm_km_alloc failed"); 791 } 792 793 if (flags & UVM_KMF_ZERO) 794 memset(rv, 0, size); 795 796 return (vaddr_t)rv; 797 } 798 799 void 800 uvm_km_free(struct vm_map *map, vaddr_t vaddr, vsize_t size, uvm_flag_t flags) 801 { 802 803 if (__predict_false(map == module_map)) 804 rumpuser_unmap((void *)vaddr, size); 805 else 806 rumpuser_free((void *)vaddr, size); 807 } 808 809 int 810 uvm_km_protect(struct vm_map *map, vaddr_t vaddr, vsize_t size, vm_prot_t prot) 811 { 812 return 0; 813 } 814 815 struct vm_map * 816 uvm_km_suballoc(struct vm_map *map, vaddr_t *minaddr, vaddr_t *maxaddr, 817 vsize_t size, int pageable, bool fixed, struct vm_map *submap) 818 { 819 820 return (struct vm_map *)417416; 821 } 822 823 int 824 uvm_km_kmem_alloc(vmem_t *vm, vmem_size_t size, vm_flag_t flags, 825 vmem_addr_t *addr) 826 { 827 vaddr_t va; 828 va = (vaddr_t)rump_hypermalloc(size, PAGE_SIZE, 829 (flags & VM_SLEEP), "kmalloc"); 830 831 if (va) { 832 *addr = va; 833 return 0; 834 } else { 835 return ENOMEM; 836 } 837 } 838 839 void 840 uvm_km_kmem_free(vmem_t *vm, vmem_addr_t addr, vmem_size_t size) 841 { 842 843 rump_hyperfree((void *)addr, size); 844 } 845 846 /* 847 * VM space locking routines. We don't really have to do anything, 848 * since the pages are always "wired" (both local and remote processes). 849 */ 850 int 851 uvm_vslock(struct vmspace *vs, void *addr, size_t len, vm_prot_t access) 852 { 853 854 return 0; 855 } 856 857 void 858 uvm_vsunlock(struct vmspace *vs, void *addr, size_t len) 859 { 860 861 } 862 863 /* 864 * For the local case the buffer mappers don't need to do anything. 865 * For the remote case we need to reserve space and copy data in or 866 * out, depending on B_READ/B_WRITE. 867 */ 868 int 869 vmapbuf(struct buf *bp, vsize_t len) 870 { 871 int error = 0; 872 873 bp->b_saveaddr = bp->b_data; 874 875 /* remote case */ 876 if (!RUMP_LOCALPROC_P(curproc)) { 877 bp->b_data = rump_hypermalloc(len, 0, true, "vmapbuf"); 878 if (BUF_ISWRITE(bp)) { 879 error = copyin(bp->b_saveaddr, bp->b_data, len); 880 if (error) { 881 rump_hyperfree(bp->b_data, len); 882 bp->b_data = bp->b_saveaddr; 883 bp->b_saveaddr = 0; 884 } 885 } 886 } 887 888 return error; 889 } 890 891 void 892 vunmapbuf(struct buf *bp, vsize_t len) 893 { 894 895 /* remote case */ 896 if (!RUMP_LOCALPROC_P(bp->b_proc)) { 897 if (BUF_ISREAD(bp)) { 898 bp->b_error = copyout_proc(bp->b_proc, 899 bp->b_data, bp->b_saveaddr, len); 900 } 901 rump_hyperfree(bp->b_data, len); 902 } 903 904 bp->b_data = bp->b_saveaddr; 905 bp->b_saveaddr = 0; 906 } 907 908 void 909 uvmspace_addref(struct vmspace *vm) 910 { 911 912 /* 913 * No dynamically allocated vmspaces exist. 914 */ 915 } 916 917 void 918 uvmspace_free(struct vmspace *vm) 919 { 920 921 /* nothing for now */ 922 } 923 924 /* 925 * page life cycle stuff. it really doesn't exist, so just stubs. 926 */ 927 928 void 929 uvm_pageactivate(struct vm_page *pg) 930 { 931 932 /* nada */ 933 } 934 935 void 936 uvm_pagedeactivate(struct vm_page *pg) 937 { 938 939 /* nada */ 940 } 941 942 void 943 uvm_pagedequeue(struct vm_page *pg) 944 { 945 946 /* nada*/ 947 } 948 949 void 950 uvm_pageenqueue(struct vm_page *pg) 951 { 952 953 /* nada */ 954 } 955 956 void 957 uvmpdpol_anfree(struct vm_anon *an) 958 { 959 960 /* nada */ 961 } 962 963 /* 964 * Physical address accessors. 965 */ 966 967 struct vm_page * 968 uvm_phys_to_vm_page(paddr_t pa) 969 { 970 971 return NULL; 972 } 973 974 paddr_t 975 uvm_vm_page_to_phys(const struct vm_page *pg) 976 { 977 978 return 0; 979 } 980 981 vaddr_t 982 uvm_uarea_alloc(void) 983 { 984 985 /* non-zero */ 986 return (vaddr_t)11; 987 } 988 989 void 990 uvm_uarea_free(vaddr_t uarea) 991 { 992 993 /* nata, so creamy */ 994 } 995 996 /* 997 * Routines related to the Page Baroness. 998 */ 999 1000 void 1001 uvm_wait(const char *msg) 1002 { 1003 1004 if (__predict_false(rump_threads == 0)) 1005 panic("pagedaemon missing (RUMP_THREADS = 0)"); 1006 1007 if (curlwp == uvm.pagedaemon_lwp) { 1008 /* is it possible for us to later get memory? */ 1009 if (!uvmexp.paging) 1010 panic("pagedaemon out of memory"); 1011 } 1012 1013 mutex_enter(&pdaemonmtx); 1014 pdaemon_waiters++; 1015 cv_signal(&pdaemoncv); 1016 cv_wait(&oomwait, &pdaemonmtx); 1017 mutex_exit(&pdaemonmtx); 1018 } 1019 1020 void 1021 uvm_pageout_start(int npages) 1022 { 1023 1024 mutex_enter(&pdaemonmtx); 1025 uvmexp.paging += npages; 1026 mutex_exit(&pdaemonmtx); 1027 } 1028 1029 void 1030 uvm_pageout_done(int npages) 1031 { 1032 1033 if (!npages) 1034 return; 1035 1036 mutex_enter(&pdaemonmtx); 1037 KASSERT(uvmexp.paging >= npages); 1038 uvmexp.paging -= npages; 1039 1040 if (pdaemon_waiters) { 1041 pdaemon_waiters = 0; 1042 cv_broadcast(&oomwait); 1043 } 1044 mutex_exit(&pdaemonmtx); 1045 } 1046 1047 static bool 1048 processpage(struct vm_page *pg, bool *lockrunning) 1049 { 1050 struct uvm_object *uobj; 1051 1052 uobj = pg->uobject; 1053 if (mutex_tryenter(uobj->vmobjlock)) { 1054 if ((pg->flags & PG_BUSY) == 0) { 1055 mutex_exit(&uvm_pageqlock); 1056 uobj->pgops->pgo_put(uobj, pg->offset, 1057 pg->offset + PAGE_SIZE, 1058 PGO_CLEANIT|PGO_FREE); 1059 KASSERT(!mutex_owned(uobj->vmobjlock)); 1060 return true; 1061 } else { 1062 mutex_exit(uobj->vmobjlock); 1063 } 1064 } else if (*lockrunning == false && ncpu > 1) { 1065 CPU_INFO_ITERATOR cii; 1066 struct cpu_info *ci; 1067 struct lwp *l; 1068 1069 l = mutex_owner(uobj->vmobjlock); 1070 for (CPU_INFO_FOREACH(cii, ci)) { 1071 if (ci->ci_curlwp == l) { 1072 *lockrunning = true; 1073 break; 1074 } 1075 } 1076 } 1077 1078 return false; 1079 } 1080 1081 /* 1082 * The Diabolical pageDaemon Director (DDD). 1083 * 1084 * This routine can always use better heuristics. 1085 */ 1086 void 1087 uvm_pageout(void *arg) 1088 { 1089 struct vm_page *pg; 1090 struct pool *pp, *pp_first; 1091 int cleaned, skip, skipped; 1092 bool succ; 1093 bool lockrunning; 1094 1095 mutex_enter(&pdaemonmtx); 1096 for (;;) { 1097 if (!NEED_PAGEDAEMON()) { 1098 kernel_map->flags &= ~VM_MAP_WANTVA; 1099 } 1100 1101 if (pdaemon_waiters) { 1102 pdaemon_waiters = 0; 1103 cv_broadcast(&oomwait); 1104 } 1105 1106 cv_wait(&pdaemoncv, &pdaemonmtx); 1107 uvmexp.pdwoke++; 1108 1109 /* tell the world that we are hungry */ 1110 kernel_map->flags |= VM_MAP_WANTVA; 1111 mutex_exit(&pdaemonmtx); 1112 1113 /* 1114 * step one: reclaim the page cache. this should give 1115 * us the biggest earnings since whole pages are released 1116 * into backing memory. 1117 */ 1118 pool_cache_reclaim(&pagecache); 1119 if (!NEED_PAGEDAEMON()) { 1120 mutex_enter(&pdaemonmtx); 1121 continue; 1122 } 1123 1124 /* 1125 * Ok, so that didn't help. Next, try to hunt memory 1126 * by pushing out vnode pages. The pages might contain 1127 * useful cached data, but we need the memory. 1128 */ 1129 cleaned = 0; 1130 skip = 0; 1131 lockrunning = false; 1132 again: 1133 mutex_enter(&uvm_pageqlock); 1134 while (cleaned < PAGEDAEMON_OBJCHUNK) { 1135 skipped = 0; 1136 TAILQ_FOREACH(pg, &vmpage_lruqueue, pageq.queue) { 1137 1138 /* 1139 * skip over pages we _might_ have tried 1140 * to handle earlier. they might not be 1141 * exactly the same ones, but I'm not too 1142 * concerned. 1143 */ 1144 while (skipped++ < skip) 1145 continue; 1146 1147 if (processpage(pg, &lockrunning)) { 1148 cleaned++; 1149 goto again; 1150 } 1151 1152 skip++; 1153 } 1154 break; 1155 } 1156 mutex_exit(&uvm_pageqlock); 1157 1158 /* 1159 * Ok, someone is running with an object lock held. 1160 * We want to yield the host CPU to make sure the 1161 * thread is not parked on the host. Since sched_yield() 1162 * doesn't appear to do anything on NetBSD, nanosleep 1163 * for the smallest possible time and hope we're back in 1164 * the game soon. 1165 */ 1166 if (cleaned == 0 && lockrunning) { 1167 rumpuser_clock_sleep(RUMPUSER_CLOCK_RELWALL, 0, 1); 1168 1169 lockrunning = false; 1170 skip = 0; 1171 1172 /* and here we go again */ 1173 goto again; 1174 } 1175 1176 /* 1177 * And of course we need to reclaim the page cache 1178 * again to actually release memory. 1179 */ 1180 pool_cache_reclaim(&pagecache); 1181 if (!NEED_PAGEDAEMON()) { 1182 mutex_enter(&pdaemonmtx); 1183 continue; 1184 } 1185 1186 /* 1187 * And then drain the pools. Wipe them out ... all of them. 1188 */ 1189 for (pp_first = NULL;;) { 1190 rump_vfs_drainbufs(10 /* XXX: estimate! */); 1191 1192 succ = pool_drain(&pp); 1193 if (succ || pp == pp_first) 1194 break; 1195 1196 if (pp_first == NULL) 1197 pp_first = pp; 1198 } 1199 1200 /* 1201 * Need to use PYEC on our bag of tricks. 1202 * Unfortunately, the wife just borrowed it. 1203 */ 1204 1205 mutex_enter(&pdaemonmtx); 1206 if (!succ && cleaned == 0 && pdaemon_waiters && 1207 uvmexp.paging == 0) { 1208 rumpuser_dprintf("pagedaemoness: failed to reclaim " 1209 "memory ... sleeping (deadlock?)\n"); 1210 kpause("pddlk", false, hz, &pdaemonmtx); 1211 } 1212 } 1213 1214 panic("you can swap out any time you like, but you can never leave"); 1215 } 1216 1217 void 1218 uvm_kick_pdaemon() 1219 { 1220 1221 /* 1222 * Wake up the diabolical pagedaemon director if we are over 1223 * 90% of the memory limit. This is a complete and utter 1224 * stetson-harrison decision which you are allowed to finetune. 1225 * Don't bother locking. If we have some unflushed caches, 1226 * other waker-uppers will deal with the issue. 1227 */ 1228 if (NEED_PAGEDAEMON()) { 1229 cv_signal(&pdaemoncv); 1230 } 1231 } 1232 1233 void * 1234 rump_hypermalloc(size_t howmuch, int alignment, bool waitok, const char *wmsg) 1235 { 1236 const unsigned long thelimit = 1237 curlwp == uvm.pagedaemon_lwp ? pdlimit : rump_physmemlimit; 1238 unsigned long newmem; 1239 void *rv; 1240 int error; 1241 1242 uvm_kick_pdaemon(); /* ouch */ 1243 1244 /* first we must be within the limit */ 1245 limitagain: 1246 if (thelimit != RUMPMEM_UNLIMITED) { 1247 newmem = atomic_add_long_nv(&curphysmem, howmuch); 1248 if (newmem > thelimit) { 1249 newmem = atomic_add_long_nv(&curphysmem, -howmuch); 1250 if (!waitok) { 1251 return NULL; 1252 } 1253 uvm_wait(wmsg); 1254 goto limitagain; 1255 } 1256 } 1257 1258 /* second, we must get something from the backend */ 1259 again: 1260 error = rumpuser_malloc(howmuch, alignment, &rv); 1261 if (__predict_false(error && waitok)) { 1262 uvm_wait(wmsg); 1263 goto again; 1264 } 1265 1266 return rv; 1267 } 1268 1269 void 1270 rump_hyperfree(void *what, size_t size) 1271 { 1272 1273 if (rump_physmemlimit != RUMPMEM_UNLIMITED) { 1274 atomic_add_long(&curphysmem, -size); 1275 } 1276 rumpuser_free(what, size); 1277 } 1278