1 /* $NetBSD: vm.c,v 1.159 2014/06/15 12:58:01 pooka Exp $ */ 2 3 /* 4 * Copyright (c) 2007-2011 Antti Kantee. All Rights Reserved. 5 * 6 * Development of this software was supported by 7 * The Finnish Cultural Foundation and the Research Foundation of 8 * The Helsinki University of Technology. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS 20 * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 21 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 22 * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 25 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 29 * SUCH DAMAGE. 30 */ 31 32 /* 33 * Virtual memory emulation routines. 34 */ 35 36 /* 37 * XXX: we abuse pg->uanon for the virtual address of the storage 38 * for each page. phys_addr would fit the job description better, 39 * except that it will create unnecessary lossage on some platforms 40 * due to not being a pointer type. 41 */ 42 43 #include <sys/cdefs.h> 44 __KERNEL_RCSID(0, "$NetBSD: vm.c,v 1.159 2014/06/15 12:58:01 pooka Exp $"); 45 46 #include <sys/param.h> 47 #include <sys/atomic.h> 48 #include <sys/buf.h> 49 #include <sys/kernel.h> 50 #include <sys/kmem.h> 51 #include <sys/vmem.h> 52 #include <sys/mman.h> 53 #include <sys/null.h> 54 #include <sys/vnode.h> 55 56 #include <machine/pmap.h> 57 58 #include <rump/rumpuser.h> 59 60 #include <uvm/uvm.h> 61 #include <uvm/uvm_ddb.h> 62 #include <uvm/uvm_pdpolicy.h> 63 #include <uvm/uvm_prot.h> 64 #include <uvm/uvm_readahead.h> 65 66 #include "rump_private.h" 67 #include "rump_vfs_private.h" 68 69 kmutex_t uvm_pageqlock; /* non-free page lock */ 70 kmutex_t uvm_fpageqlock; /* free page lock, non-gpl license */ 71 kmutex_t uvm_swap_data_lock; 72 73 struct uvmexp uvmexp; 74 struct uvm uvm; 75 76 #ifdef __uvmexp_pagesize 77 const int * const uvmexp_pagesize = &uvmexp.pagesize; 78 const int * const uvmexp_pagemask = &uvmexp.pagemask; 79 const int * const uvmexp_pageshift = &uvmexp.pageshift; 80 #endif 81 82 struct vm_map rump_vmmap; 83 84 static struct vm_map kernel_map_store; 85 struct vm_map *kernel_map = &kernel_map_store; 86 87 static struct vm_map module_map_store; 88 extern struct vm_map *module_map; 89 90 vmem_t *kmem_arena; 91 vmem_t *kmem_va_arena; 92 93 static unsigned int pdaemon_waiters; 94 static kmutex_t pdaemonmtx; 95 static kcondvar_t pdaemoncv, oomwait; 96 97 unsigned long rump_physmemlimit = RUMPMEM_UNLIMITED; 98 static unsigned long pdlimit = RUMPMEM_UNLIMITED; /* page daemon memlimit */ 99 static unsigned long curphysmem; 100 static unsigned long dddlim; /* 90% of memory limit used */ 101 #define NEED_PAGEDAEMON() \ 102 (rump_physmemlimit != RUMPMEM_UNLIMITED && curphysmem > dddlim) 103 #define PDRESERVE (2*MAXPHYS) 104 105 /* 106 * Try to free two pages worth of pages from objects. 107 * If this succesfully frees a full page cache page, we'll 108 * free the released page plus PAGE_SIZE/sizeof(vm_page). 109 */ 110 #define PAGEDAEMON_OBJCHUNK (2*PAGE_SIZE / sizeof(struct vm_page)) 111 112 /* 113 * Keep a list of least recently used pages. Since the only way a 114 * rump kernel can "access" a page is via lookup, we put the page 115 * at the back of queue every time a lookup for it is done. If the 116 * page is in front of this global queue and we're short of memory, 117 * it's a candidate for pageout. 118 */ 119 static struct pglist vmpage_lruqueue; 120 static unsigned vmpage_onqueue; 121 122 static int 123 pg_compare_key(void *ctx, const void *n, const void *key) 124 { 125 voff_t a = ((const struct vm_page *)n)->offset; 126 voff_t b = *(const voff_t *)key; 127 128 if (a < b) 129 return -1; 130 else if (a > b) 131 return 1; 132 else 133 return 0; 134 } 135 136 static int 137 pg_compare_nodes(void *ctx, const void *n1, const void *n2) 138 { 139 140 return pg_compare_key(ctx, n1, &((const struct vm_page *)n2)->offset); 141 } 142 143 const rb_tree_ops_t uvm_page_tree_ops = { 144 .rbto_compare_nodes = pg_compare_nodes, 145 .rbto_compare_key = pg_compare_key, 146 .rbto_node_offset = offsetof(struct vm_page, rb_node), 147 .rbto_context = NULL 148 }; 149 150 /* 151 * vm pages 152 */ 153 154 static int 155 pgctor(void *arg, void *obj, int flags) 156 { 157 struct vm_page *pg = obj; 158 159 memset(pg, 0, sizeof(*pg)); 160 pg->uanon = rump_hypermalloc(PAGE_SIZE, PAGE_SIZE, 161 (flags & PR_WAITOK) == PR_WAITOK, "pgalloc"); 162 return pg->uanon == NULL; 163 } 164 165 static void 166 pgdtor(void *arg, void *obj) 167 { 168 struct vm_page *pg = obj; 169 170 rump_hyperfree(pg->uanon, PAGE_SIZE); 171 } 172 173 static struct pool_cache pagecache; 174 175 /* 176 * Called with the object locked. We don't support anons. 177 */ 178 struct vm_page * 179 uvm_pagealloc_strat(struct uvm_object *uobj, voff_t off, struct vm_anon *anon, 180 int flags, int strat, int free_list) 181 { 182 struct vm_page *pg; 183 184 KASSERT(uobj && mutex_owned(uobj->vmobjlock)); 185 KASSERT(anon == NULL); 186 187 pg = pool_cache_get(&pagecache, PR_NOWAIT); 188 if (__predict_false(pg == NULL)) { 189 return NULL; 190 } 191 192 pg->offset = off; 193 pg->uobject = uobj; 194 195 pg->flags = PG_CLEAN|PG_BUSY|PG_FAKE; 196 if (flags & UVM_PGA_ZERO) { 197 uvm_pagezero(pg); 198 } 199 200 TAILQ_INSERT_TAIL(&uobj->memq, pg, listq.queue); 201 (void)rb_tree_insert_node(&uobj->rb_tree, pg); 202 203 /* 204 * Don't put anons on the LRU page queue. We can't flush them 205 * (there's no concept of swap in a rump kernel), so no reason 206 * to bother with them. 207 */ 208 if (!UVM_OBJ_IS_AOBJ(uobj)) { 209 atomic_inc_uint(&vmpage_onqueue); 210 mutex_enter(&uvm_pageqlock); 211 TAILQ_INSERT_TAIL(&vmpage_lruqueue, pg, pageq.queue); 212 mutex_exit(&uvm_pageqlock); 213 } 214 215 uobj->uo_npages++; 216 217 return pg; 218 } 219 220 /* 221 * Release a page. 222 * 223 * Called with the vm object locked. 224 */ 225 void 226 uvm_pagefree(struct vm_page *pg) 227 { 228 struct uvm_object *uobj = pg->uobject; 229 230 KASSERT(mutex_owned(&uvm_pageqlock)); 231 KASSERT(mutex_owned(uobj->vmobjlock)); 232 233 if (pg->flags & PG_WANTED) 234 wakeup(pg); 235 236 TAILQ_REMOVE(&uobj->memq, pg, listq.queue); 237 238 uobj->uo_npages--; 239 rb_tree_remove_node(&uobj->rb_tree, pg); 240 241 if (!UVM_OBJ_IS_AOBJ(uobj)) { 242 TAILQ_REMOVE(&vmpage_lruqueue, pg, pageq.queue); 243 atomic_dec_uint(&vmpage_onqueue); 244 } 245 246 pool_cache_put(&pagecache, pg); 247 } 248 249 void 250 uvm_pagezero(struct vm_page *pg) 251 { 252 253 pg->flags &= ~PG_CLEAN; 254 memset((void *)pg->uanon, 0, PAGE_SIZE); 255 } 256 257 /* 258 * uvm_page_locked_p: return true if object associated with page is 259 * locked. this is a weak check for runtime assertions only. 260 */ 261 262 bool 263 uvm_page_locked_p(struct vm_page *pg) 264 { 265 266 return mutex_owned(pg->uobject->vmobjlock); 267 } 268 269 /* 270 * Misc routines 271 */ 272 273 static kmutex_t pagermtx; 274 275 void 276 uvm_init(void) 277 { 278 char buf[64]; 279 280 if (rumpuser_getparam("RUMP_MEMLIMIT", buf, sizeof(buf)) == 0) { 281 unsigned long tmp; 282 char *ep; 283 int mult; 284 285 tmp = strtoul(buf, &ep, 10); 286 if (strlen(ep) > 1) 287 panic("uvm_init: invalid RUMP_MEMLIMIT: %s", buf); 288 289 /* mini-dehumanize-number */ 290 mult = 1; 291 switch (*ep) { 292 case 'k': 293 mult = 1024; 294 break; 295 case 'm': 296 mult = 1024*1024; 297 break; 298 case 'g': 299 mult = 1024*1024*1024; 300 break; 301 case 0: 302 break; 303 default: 304 panic("uvm_init: invalid RUMP_MEMLIMIT: %s", buf); 305 } 306 rump_physmemlimit = tmp * mult; 307 308 if (rump_physmemlimit / mult != tmp) 309 panic("uvm_init: RUMP_MEMLIMIT overflow: %s", buf); 310 311 /* reserve some memory for the pager */ 312 if (rump_physmemlimit <= PDRESERVE) 313 panic("uvm_init: system reserves %d bytes of mem, " 314 "only %lu bytes given", 315 PDRESERVE, rump_physmemlimit); 316 pdlimit = rump_physmemlimit; 317 rump_physmemlimit -= PDRESERVE; 318 319 if (pdlimit < 1024*1024) 320 printf("uvm_init: WARNING: <1MB RAM limit, " 321 "hope you know what you're doing\n"); 322 323 #define HUMANIZE_BYTES 9 324 CTASSERT(sizeof(buf) >= HUMANIZE_BYTES); 325 format_bytes(buf, HUMANIZE_BYTES, rump_physmemlimit); 326 #undef HUMANIZE_BYTES 327 dddlim = 9 * (rump_physmemlimit / 10); 328 } else { 329 strlcpy(buf, "unlimited (host limit)", sizeof(buf)); 330 } 331 aprint_verbose("total memory = %s\n", buf); 332 333 TAILQ_INIT(&vmpage_lruqueue); 334 335 if (rump_physmemlimit == RUMPMEM_UNLIMITED) { 336 uvmexp.npages = physmem; 337 } else { 338 uvmexp.npages = pdlimit >> PAGE_SHIFT; 339 uvmexp.reserve_pagedaemon = PDRESERVE >> PAGE_SHIFT; 340 uvmexp.freetarg = (rump_physmemlimit-dddlim) >> PAGE_SHIFT; 341 } 342 /* 343 * uvmexp.free is not used internally or updated. The reason is 344 * that the memory hypercall allocator is allowed to allocate 345 * non-page sized chunks. We use a byte count in curphysmem 346 * instead. 347 */ 348 uvmexp.free = uvmexp.npages; 349 350 #ifndef __uvmexp_pagesize 351 uvmexp.pagesize = PAGE_SIZE; 352 uvmexp.pagemask = PAGE_MASK; 353 uvmexp.pageshift = PAGE_SHIFT; 354 #else 355 #define FAKE_PAGE_SHIFT 12 356 uvmexp.pageshift = FAKE_PAGE_SHIFT; 357 uvmexp.pagesize = 1<<FAKE_PAGE_SHIFT; 358 uvmexp.pagemask = (1<<FAKE_PAGE_SHIFT)-1; 359 #undef FAKE_PAGE_SHIFT 360 #endif 361 362 mutex_init(&pagermtx, MUTEX_DEFAULT, IPL_NONE); 363 mutex_init(&uvm_pageqlock, MUTEX_DEFAULT, IPL_NONE); 364 mutex_init(&uvm_swap_data_lock, MUTEX_DEFAULT, IPL_NONE); 365 366 /* just to appease linkage */ 367 mutex_init(&uvm_fpageqlock, MUTEX_SPIN, IPL_VM); 368 369 mutex_init(&pdaemonmtx, MUTEX_DEFAULT, IPL_NONE); 370 cv_init(&pdaemoncv, "pdaemon"); 371 cv_init(&oomwait, "oomwait"); 372 373 module_map = &module_map_store; 374 375 kernel_map->pmap = pmap_kernel(); 376 377 pool_subsystem_init(); 378 379 kmem_arena = vmem_create("kmem", 0, 1024*1024, PAGE_SIZE, 380 NULL, NULL, NULL, 381 0, VM_NOSLEEP | VM_BOOTSTRAP, IPL_VM); 382 383 vmem_subsystem_init(kmem_arena); 384 385 kmem_va_arena = vmem_create("kva", 0, 0, PAGE_SIZE, 386 vmem_alloc, vmem_free, kmem_arena, 387 8 * PAGE_SIZE, VM_NOSLEEP | VM_BOOTSTRAP, IPL_VM); 388 389 pool_cache_bootstrap(&pagecache, sizeof(struct vm_page), 0, 0, 0, 390 "page$", NULL, IPL_NONE, pgctor, pgdtor, NULL); 391 } 392 393 void 394 uvmspace_init(struct vmspace *vm, struct pmap *pmap, vaddr_t vmin, vaddr_t vmax, 395 bool topdown) 396 { 397 398 vm->vm_map.pmap = pmap_kernel(); 399 vm->vm_refcnt = 1; 400 } 401 402 void 403 uvm_pagewire(struct vm_page *pg) 404 { 405 406 /* nada */ 407 } 408 409 void 410 uvm_pageunwire(struct vm_page *pg) 411 { 412 413 /* nada */ 414 } 415 416 /* where's your schmonz now? */ 417 #define PUNLIMIT(a) \ 418 p->p_rlimit[a].rlim_cur = p->p_rlimit[a].rlim_max = RLIM_INFINITY; 419 void 420 uvm_init_limits(struct proc *p) 421 { 422 423 #ifndef DFLSSIZ 424 #define DFLSSIZ (16*1024*1024) 425 #endif 426 p->p_rlimit[RLIMIT_STACK].rlim_cur = DFLSSIZ; 427 p->p_rlimit[RLIMIT_STACK].rlim_max = MAXSSIZ; 428 PUNLIMIT(RLIMIT_DATA); 429 PUNLIMIT(RLIMIT_RSS); 430 PUNLIMIT(RLIMIT_AS); 431 /* nice, cascade */ 432 } 433 #undef PUNLIMIT 434 435 /* 436 * This satisfies the "disgusting mmap hack" used by proplib. 437 * We probably should grow some more assertables to make sure we're 438 * not satisfying anything we shouldn't be satisfying. 439 */ 440 int 441 uvm_mmap(struct vm_map *map, vaddr_t *addr, vsize_t size, vm_prot_t prot, 442 vm_prot_t maxprot, int flags, void *handle, voff_t off, vsize_t locklim) 443 { 444 void *uaddr; 445 int error; 446 447 if (prot != (VM_PROT_READ | VM_PROT_WRITE)) 448 panic("uvm_mmap() variant unsupported"); 449 if (flags != (MAP_PRIVATE | MAP_ANON)) 450 panic("uvm_mmap() variant unsupported"); 451 452 /* no reason in particular, but cf. uvm_default_mapaddr() */ 453 if (*addr != 0) 454 panic("uvm_mmap() variant unsupported"); 455 456 if (RUMP_LOCALPROC_P(curproc)) { 457 error = rumpuser_anonmmap(NULL, size, 0, 0, &uaddr); 458 } else { 459 error = rumpuser_sp_anonmmap(curproc->p_vmspace->vm_map.pmap, 460 size, &uaddr); 461 } 462 if (error) 463 return error; 464 465 *addr = (vaddr_t)uaddr; 466 return 0; 467 } 468 469 struct pagerinfo { 470 vaddr_t pgr_kva; 471 int pgr_npages; 472 struct vm_page **pgr_pgs; 473 bool pgr_read; 474 475 LIST_ENTRY(pagerinfo) pgr_entries; 476 }; 477 static LIST_HEAD(, pagerinfo) pagerlist = LIST_HEAD_INITIALIZER(pagerlist); 478 479 /* 480 * Pager "map" in routine. Instead of mapping, we allocate memory 481 * and copy page contents there. The reason for copying instead of 482 * mapping is simple: we do not assume we are running on virtual 483 * memory. Even if we could emulate virtual memory in some envs 484 * such as userspace, copying is much faster than trying to awkardly 485 * cope with remapping (see "Design and Implementation" pp.95-98). 486 * The downside of the approach is that the pager requires MAXPHYS 487 * free memory to perform paging, but short of virtual memory or 488 * making the pager do I/O in page-sized chunks we cannot do much 489 * about that. 490 */ 491 vaddr_t 492 uvm_pagermapin(struct vm_page **pgs, int npages, int flags) 493 { 494 struct pagerinfo *pgri; 495 vaddr_t curkva; 496 int i; 497 498 /* allocate structures */ 499 pgri = kmem_alloc(sizeof(*pgri), KM_SLEEP); 500 pgri->pgr_kva = (vaddr_t)kmem_alloc(npages * PAGE_SIZE, KM_SLEEP); 501 pgri->pgr_npages = npages; 502 pgri->pgr_pgs = kmem_alloc(sizeof(struct vm_page *) * npages, KM_SLEEP); 503 pgri->pgr_read = (flags & UVMPAGER_MAPIN_READ) != 0; 504 505 /* copy contents to "mapped" memory */ 506 for (i = 0, curkva = pgri->pgr_kva; 507 i < npages; 508 i++, curkva += PAGE_SIZE) { 509 /* 510 * We need to copy the previous contents of the pages to 511 * the window even if we are reading from the 512 * device, since the device might not fill the contents of 513 * the full mapped range and we will end up corrupting 514 * data when we unmap the window. 515 */ 516 memcpy((void*)curkva, pgs[i]->uanon, PAGE_SIZE); 517 pgri->pgr_pgs[i] = pgs[i]; 518 } 519 520 mutex_enter(&pagermtx); 521 LIST_INSERT_HEAD(&pagerlist, pgri, pgr_entries); 522 mutex_exit(&pagermtx); 523 524 return pgri->pgr_kva; 525 } 526 527 /* 528 * map out the pager window. return contents from VA to page storage 529 * and free structures. 530 * 531 * Note: does not currently support partial frees 532 */ 533 void 534 uvm_pagermapout(vaddr_t kva, int npages) 535 { 536 struct pagerinfo *pgri; 537 vaddr_t curkva; 538 int i; 539 540 mutex_enter(&pagermtx); 541 LIST_FOREACH(pgri, &pagerlist, pgr_entries) { 542 if (pgri->pgr_kva == kva) 543 break; 544 } 545 KASSERT(pgri); 546 if (pgri->pgr_npages != npages) 547 panic("uvm_pagermapout: partial unmapping not supported"); 548 LIST_REMOVE(pgri, pgr_entries); 549 mutex_exit(&pagermtx); 550 551 if (pgri->pgr_read) { 552 for (i = 0, curkva = pgri->pgr_kva; 553 i < pgri->pgr_npages; 554 i++, curkva += PAGE_SIZE) { 555 memcpy(pgri->pgr_pgs[i]->uanon,(void*)curkva,PAGE_SIZE); 556 } 557 } 558 559 kmem_free(pgri->pgr_pgs, npages * sizeof(struct vm_page *)); 560 kmem_free((void*)pgri->pgr_kva, npages * PAGE_SIZE); 561 kmem_free(pgri, sizeof(*pgri)); 562 } 563 564 /* 565 * convert va in pager window to page structure. 566 * XXX: how expensive is this (global lock, list traversal)? 567 */ 568 struct vm_page * 569 uvm_pageratop(vaddr_t va) 570 { 571 struct pagerinfo *pgri; 572 struct vm_page *pg = NULL; 573 int i; 574 575 mutex_enter(&pagermtx); 576 LIST_FOREACH(pgri, &pagerlist, pgr_entries) { 577 if (pgri->pgr_kva <= va 578 && va < pgri->pgr_kva + pgri->pgr_npages*PAGE_SIZE) 579 break; 580 } 581 if (pgri) { 582 i = (va - pgri->pgr_kva) >> PAGE_SHIFT; 583 pg = pgri->pgr_pgs[i]; 584 } 585 mutex_exit(&pagermtx); 586 587 return pg; 588 } 589 590 /* 591 * Called with the vm object locked. 592 * 593 * Put vnode object pages at the end of the access queue to indicate 594 * they have been recently accessed and should not be immediate 595 * candidates for pageout. Do not do this for lookups done by 596 * the pagedaemon to mimic pmap_kentered mappings which don't track 597 * access information. 598 */ 599 struct vm_page * 600 uvm_pagelookup(struct uvm_object *uobj, voff_t off) 601 { 602 struct vm_page *pg; 603 bool ispagedaemon = curlwp == uvm.pagedaemon_lwp; 604 605 pg = rb_tree_find_node(&uobj->rb_tree, &off); 606 if (pg && !UVM_OBJ_IS_AOBJ(pg->uobject) && !ispagedaemon) { 607 mutex_enter(&uvm_pageqlock); 608 TAILQ_REMOVE(&vmpage_lruqueue, pg, pageq.queue); 609 TAILQ_INSERT_TAIL(&vmpage_lruqueue, pg, pageq.queue); 610 mutex_exit(&uvm_pageqlock); 611 } 612 613 return pg; 614 } 615 616 void 617 uvm_page_unbusy(struct vm_page **pgs, int npgs) 618 { 619 struct vm_page *pg; 620 int i; 621 622 KASSERT(npgs > 0); 623 KASSERT(mutex_owned(pgs[0]->uobject->vmobjlock)); 624 625 for (i = 0; i < npgs; i++) { 626 pg = pgs[i]; 627 if (pg == NULL) 628 continue; 629 630 KASSERT(pg->flags & PG_BUSY); 631 if (pg->flags & PG_WANTED) 632 wakeup(pg); 633 if (pg->flags & PG_RELEASED) 634 uvm_pagefree(pg); 635 else 636 pg->flags &= ~(PG_WANTED|PG_BUSY); 637 } 638 } 639 640 void 641 uvm_estimatepageable(int *active, int *inactive) 642 { 643 644 /* XXX: guessing game */ 645 *active = 1024; 646 *inactive = 1024; 647 } 648 649 bool 650 vm_map_starved_p(struct vm_map *map) 651 { 652 653 if (map->flags & VM_MAP_WANTVA) 654 return true; 655 656 return false; 657 } 658 659 int 660 uvm_loan(struct vm_map *map, vaddr_t start, vsize_t len, void *v, int flags) 661 { 662 663 panic("%s: unimplemented", __func__); 664 } 665 666 void 667 uvm_unloan(void *v, int npages, int flags) 668 { 669 670 panic("%s: unimplemented", __func__); 671 } 672 673 int 674 uvm_loanuobjpages(struct uvm_object *uobj, voff_t pgoff, int orignpages, 675 struct vm_page **opp) 676 { 677 678 return EBUSY; 679 } 680 681 struct vm_page * 682 uvm_loanbreak(struct vm_page *pg) 683 { 684 685 panic("%s: unimplemented", __func__); 686 } 687 688 void 689 ubc_purge(struct uvm_object *uobj) 690 { 691 692 } 693 694 vaddr_t 695 uvm_default_mapaddr(struct proc *p, vaddr_t base, vsize_t sz) 696 { 697 698 return 0; 699 } 700 701 int 702 uvm_map_protect(struct vm_map *map, vaddr_t start, vaddr_t end, 703 vm_prot_t prot, bool set_max) 704 { 705 706 return EOPNOTSUPP; 707 } 708 709 /* 710 * UVM km 711 */ 712 713 vaddr_t 714 uvm_km_alloc(struct vm_map *map, vsize_t size, vsize_t align, uvm_flag_t flags) 715 { 716 void *rv, *desired = NULL; 717 int alignbit, error; 718 719 #ifdef __x86_64__ 720 /* 721 * On amd64, allocate all module memory from the lowest 2GB. 722 * This is because NetBSD kernel modules are compiled 723 * with -mcmodel=kernel and reserve only 4 bytes for 724 * offsets. If we load code compiled with -mcmodel=kernel 725 * anywhere except the lowest or highest 2GB, it will not 726 * work. Since userspace does not have access to the highest 727 * 2GB, use the lowest 2GB. 728 * 729 * Note: this assumes the rump kernel resides in 730 * the lowest 2GB as well. 731 * 732 * Note2: yes, it's a quick hack, but since this the only 733 * place where we care about the map we're allocating from, 734 * just use a simple "if" instead of coming up with a fancy 735 * generic solution. 736 */ 737 if (map == module_map) { 738 desired = (void *)(0x80000000 - size); 739 } 740 #endif 741 742 if (__predict_false(map == module_map)) { 743 alignbit = 0; 744 if (align) { 745 alignbit = ffs(align)-1; 746 } 747 error = rumpuser_anonmmap(desired, size, alignbit, 748 flags & UVM_KMF_EXEC, &rv); 749 } else { 750 error = rumpuser_malloc(size, align, &rv); 751 } 752 753 if (error) { 754 if (flags & (UVM_KMF_CANFAIL | UVM_KMF_NOWAIT)) 755 return 0; 756 else 757 panic("uvm_km_alloc failed"); 758 } 759 760 if (flags & UVM_KMF_ZERO) 761 memset(rv, 0, size); 762 763 return (vaddr_t)rv; 764 } 765 766 void 767 uvm_km_free(struct vm_map *map, vaddr_t vaddr, vsize_t size, uvm_flag_t flags) 768 { 769 770 if (__predict_false(map == module_map)) 771 rumpuser_unmap((void *)vaddr, size); 772 else 773 rumpuser_free((void *)vaddr, size); 774 } 775 776 struct vm_map * 777 uvm_km_suballoc(struct vm_map *map, vaddr_t *minaddr, vaddr_t *maxaddr, 778 vsize_t size, int pageable, bool fixed, struct vm_map *submap) 779 { 780 781 return (struct vm_map *)417416; 782 } 783 784 int 785 uvm_km_kmem_alloc(vmem_t *vm, vmem_size_t size, vm_flag_t flags, 786 vmem_addr_t *addr) 787 { 788 vaddr_t va; 789 va = (vaddr_t)rump_hypermalloc(size, PAGE_SIZE, 790 (flags & VM_SLEEP), "kmalloc"); 791 792 if (va) { 793 *addr = va; 794 return 0; 795 } else { 796 return ENOMEM; 797 } 798 } 799 800 void 801 uvm_km_kmem_free(vmem_t *vm, vmem_addr_t addr, vmem_size_t size) 802 { 803 804 rump_hyperfree((void *)addr, size); 805 } 806 807 /* 808 * VM space locking routines. We don't really have to do anything, 809 * since the pages are always "wired" (both local and remote processes). 810 */ 811 int 812 uvm_vslock(struct vmspace *vs, void *addr, size_t len, vm_prot_t access) 813 { 814 815 return 0; 816 } 817 818 void 819 uvm_vsunlock(struct vmspace *vs, void *addr, size_t len) 820 { 821 822 } 823 824 /* 825 * For the local case the buffer mappers don't need to do anything. 826 * For the remote case we need to reserve space and copy data in or 827 * out, depending on B_READ/B_WRITE. 828 */ 829 int 830 vmapbuf(struct buf *bp, vsize_t len) 831 { 832 int error = 0; 833 834 bp->b_saveaddr = bp->b_data; 835 836 /* remote case */ 837 if (!RUMP_LOCALPROC_P(curproc)) { 838 bp->b_data = rump_hypermalloc(len, 0, true, "vmapbuf"); 839 if (BUF_ISWRITE(bp)) { 840 error = copyin(bp->b_saveaddr, bp->b_data, len); 841 if (error) { 842 rump_hyperfree(bp->b_data, len); 843 bp->b_data = bp->b_saveaddr; 844 bp->b_saveaddr = 0; 845 } 846 } 847 } 848 849 return error; 850 } 851 852 void 853 vunmapbuf(struct buf *bp, vsize_t len) 854 { 855 856 /* remote case */ 857 if (!RUMP_LOCALPROC_P(bp->b_proc)) { 858 if (BUF_ISREAD(bp)) { 859 bp->b_error = copyout_proc(bp->b_proc, 860 bp->b_data, bp->b_saveaddr, len); 861 } 862 rump_hyperfree(bp->b_data, len); 863 } 864 865 bp->b_data = bp->b_saveaddr; 866 bp->b_saveaddr = 0; 867 } 868 869 void 870 uvmspace_addref(struct vmspace *vm) 871 { 872 873 /* 874 * No dynamically allocated vmspaces exist. 875 */ 876 } 877 878 void 879 uvmspace_free(struct vmspace *vm) 880 { 881 882 /* nothing for now */ 883 } 884 885 /* 886 * page life cycle stuff. it really doesn't exist, so just stubs. 887 */ 888 889 void 890 uvm_pageactivate(struct vm_page *pg) 891 { 892 893 /* nada */ 894 } 895 896 void 897 uvm_pagedeactivate(struct vm_page *pg) 898 { 899 900 /* nada */ 901 } 902 903 void 904 uvm_pagedequeue(struct vm_page *pg) 905 { 906 907 /* nada*/ 908 } 909 910 void 911 uvm_pageenqueue(struct vm_page *pg) 912 { 913 914 /* nada */ 915 } 916 917 void 918 uvmpdpol_anfree(struct vm_anon *an) 919 { 920 921 /* nada */ 922 } 923 924 /* 925 * Physical address accessors. 926 */ 927 928 struct vm_page * 929 uvm_phys_to_vm_page(paddr_t pa) 930 { 931 932 return NULL; 933 } 934 935 paddr_t 936 uvm_vm_page_to_phys(const struct vm_page *pg) 937 { 938 939 return 0; 940 } 941 942 vaddr_t 943 uvm_uarea_alloc(void) 944 { 945 946 /* non-zero */ 947 return (vaddr_t)11; 948 } 949 950 void 951 uvm_uarea_free(vaddr_t uarea) 952 { 953 954 /* nata, so creamy */ 955 } 956 957 /* 958 * Routines related to the Page Baroness. 959 */ 960 961 void 962 uvm_wait(const char *msg) 963 { 964 965 if (__predict_false(rump_threads == 0)) 966 panic("pagedaemon missing (RUMP_THREADS = 0)"); 967 968 if (curlwp == uvm.pagedaemon_lwp) { 969 /* is it possible for us to later get memory? */ 970 if (!uvmexp.paging) 971 panic("pagedaemon out of memory"); 972 } 973 974 mutex_enter(&pdaemonmtx); 975 pdaemon_waiters++; 976 cv_signal(&pdaemoncv); 977 cv_wait(&oomwait, &pdaemonmtx); 978 mutex_exit(&pdaemonmtx); 979 } 980 981 void 982 uvm_pageout_start(int npages) 983 { 984 985 mutex_enter(&pdaemonmtx); 986 uvmexp.paging += npages; 987 mutex_exit(&pdaemonmtx); 988 } 989 990 void 991 uvm_pageout_done(int npages) 992 { 993 994 if (!npages) 995 return; 996 997 mutex_enter(&pdaemonmtx); 998 KASSERT(uvmexp.paging >= npages); 999 uvmexp.paging -= npages; 1000 1001 if (pdaemon_waiters) { 1002 pdaemon_waiters = 0; 1003 cv_broadcast(&oomwait); 1004 } 1005 mutex_exit(&pdaemonmtx); 1006 } 1007 1008 static bool 1009 processpage(struct vm_page *pg, bool *lockrunning) 1010 { 1011 struct uvm_object *uobj; 1012 1013 uobj = pg->uobject; 1014 if (mutex_tryenter(uobj->vmobjlock)) { 1015 if ((pg->flags & PG_BUSY) == 0) { 1016 mutex_exit(&uvm_pageqlock); 1017 uobj->pgops->pgo_put(uobj, pg->offset, 1018 pg->offset + PAGE_SIZE, 1019 PGO_CLEANIT|PGO_FREE); 1020 KASSERT(!mutex_owned(uobj->vmobjlock)); 1021 return true; 1022 } else { 1023 mutex_exit(uobj->vmobjlock); 1024 } 1025 } else if (*lockrunning == false && ncpu > 1) { 1026 CPU_INFO_ITERATOR cii; 1027 struct cpu_info *ci; 1028 struct lwp *l; 1029 1030 l = mutex_owner(uobj->vmobjlock); 1031 for (CPU_INFO_FOREACH(cii, ci)) { 1032 if (ci->ci_curlwp == l) { 1033 *lockrunning = true; 1034 break; 1035 } 1036 } 1037 } 1038 1039 return false; 1040 } 1041 1042 /* 1043 * The Diabolical pageDaemon Director (DDD). 1044 * 1045 * This routine can always use better heuristics. 1046 */ 1047 void 1048 uvm_pageout(void *arg) 1049 { 1050 struct vm_page *pg; 1051 struct pool *pp, *pp_first; 1052 int cleaned, skip, skipped; 1053 bool succ; 1054 bool lockrunning; 1055 1056 mutex_enter(&pdaemonmtx); 1057 for (;;) { 1058 if (!NEED_PAGEDAEMON()) { 1059 kernel_map->flags &= ~VM_MAP_WANTVA; 1060 } 1061 1062 if (pdaemon_waiters) { 1063 pdaemon_waiters = 0; 1064 cv_broadcast(&oomwait); 1065 } 1066 1067 cv_wait(&pdaemoncv, &pdaemonmtx); 1068 uvmexp.pdwoke++; 1069 1070 /* tell the world that we are hungry */ 1071 kernel_map->flags |= VM_MAP_WANTVA; 1072 mutex_exit(&pdaemonmtx); 1073 1074 /* 1075 * step one: reclaim the page cache. this should give 1076 * us the biggest earnings since whole pages are released 1077 * into backing memory. 1078 */ 1079 pool_cache_reclaim(&pagecache); 1080 if (!NEED_PAGEDAEMON()) { 1081 mutex_enter(&pdaemonmtx); 1082 continue; 1083 } 1084 1085 /* 1086 * Ok, so that didn't help. Next, try to hunt memory 1087 * by pushing out vnode pages. The pages might contain 1088 * useful cached data, but we need the memory. 1089 */ 1090 cleaned = 0; 1091 skip = 0; 1092 lockrunning = false; 1093 again: 1094 mutex_enter(&uvm_pageqlock); 1095 while (cleaned < PAGEDAEMON_OBJCHUNK) { 1096 skipped = 0; 1097 TAILQ_FOREACH(pg, &vmpage_lruqueue, pageq.queue) { 1098 1099 /* 1100 * skip over pages we _might_ have tried 1101 * to handle earlier. they might not be 1102 * exactly the same ones, but I'm not too 1103 * concerned. 1104 */ 1105 while (skipped++ < skip) 1106 continue; 1107 1108 if (processpage(pg, &lockrunning)) { 1109 cleaned++; 1110 goto again; 1111 } 1112 1113 skip++; 1114 } 1115 break; 1116 } 1117 mutex_exit(&uvm_pageqlock); 1118 1119 /* 1120 * Ok, someone is running with an object lock held. 1121 * We want to yield the host CPU to make sure the 1122 * thread is not parked on the host. Since sched_yield() 1123 * doesn't appear to do anything on NetBSD, nanosleep 1124 * for the smallest possible time and hope we're back in 1125 * the game soon. 1126 */ 1127 if (cleaned == 0 && lockrunning) { 1128 rumpuser_clock_sleep(RUMPUSER_CLOCK_RELWALL, 0, 1); 1129 1130 lockrunning = false; 1131 skip = 0; 1132 1133 /* and here we go again */ 1134 goto again; 1135 } 1136 1137 /* 1138 * And of course we need to reclaim the page cache 1139 * again to actually release memory. 1140 */ 1141 pool_cache_reclaim(&pagecache); 1142 if (!NEED_PAGEDAEMON()) { 1143 mutex_enter(&pdaemonmtx); 1144 continue; 1145 } 1146 1147 /* 1148 * And then drain the pools. Wipe them out ... all of them. 1149 */ 1150 for (pp_first = NULL;;) { 1151 rump_vfs_drainbufs(10 /* XXX: estimate! */); 1152 1153 succ = pool_drain(&pp); 1154 if (succ || pp == pp_first) 1155 break; 1156 1157 if (pp_first == NULL) 1158 pp_first = pp; 1159 } 1160 1161 /* 1162 * Need to use PYEC on our bag of tricks. 1163 * Unfortunately, the wife just borrowed it. 1164 */ 1165 1166 mutex_enter(&pdaemonmtx); 1167 if (!succ && cleaned == 0 && pdaemon_waiters && 1168 uvmexp.paging == 0) { 1169 rumpuser_dprintf("pagedaemoness: failed to reclaim " 1170 "memory ... sleeping (deadlock?)\n"); 1171 cv_timedwait(&pdaemoncv, &pdaemonmtx, hz); 1172 } 1173 } 1174 1175 panic("you can swap out any time you like, but you can never leave"); 1176 } 1177 1178 void 1179 uvm_kick_pdaemon() 1180 { 1181 1182 /* 1183 * Wake up the diabolical pagedaemon director if we are over 1184 * 90% of the memory limit. This is a complete and utter 1185 * stetson-harrison decision which you are allowed to finetune. 1186 * Don't bother locking. If we have some unflushed caches, 1187 * other waker-uppers will deal with the issue. 1188 */ 1189 if (NEED_PAGEDAEMON()) { 1190 cv_signal(&pdaemoncv); 1191 } 1192 } 1193 1194 void * 1195 rump_hypermalloc(size_t howmuch, int alignment, bool waitok, const char *wmsg) 1196 { 1197 const unsigned long thelimit = 1198 curlwp == uvm.pagedaemon_lwp ? pdlimit : rump_physmemlimit; 1199 unsigned long newmem; 1200 void *rv; 1201 int error; 1202 1203 uvm_kick_pdaemon(); /* ouch */ 1204 1205 /* first we must be within the limit */ 1206 limitagain: 1207 if (thelimit != RUMPMEM_UNLIMITED) { 1208 newmem = atomic_add_long_nv(&curphysmem, howmuch); 1209 if (newmem > thelimit) { 1210 newmem = atomic_add_long_nv(&curphysmem, -howmuch); 1211 if (!waitok) { 1212 return NULL; 1213 } 1214 uvm_wait(wmsg); 1215 goto limitagain; 1216 } 1217 } 1218 1219 /* second, we must get something from the backend */ 1220 again: 1221 error = rumpuser_malloc(howmuch, alignment, &rv); 1222 if (__predict_false(error && waitok)) { 1223 uvm_wait(wmsg); 1224 goto again; 1225 } 1226 1227 return rv; 1228 } 1229 1230 void 1231 rump_hyperfree(void *what, size_t size) 1232 { 1233 1234 if (rump_physmemlimit != RUMPMEM_UNLIMITED) { 1235 atomic_add_long(&curphysmem, -size); 1236 } 1237 rumpuser_free(what, size); 1238 } 1239