xref: /netbsd-src/sys/rump/librump/rumpkern/vm.c (revision 3fd84419c8d5cd261e662e70579e6824ec1b7953)
1 /*	$NetBSD: vm.c,v 1.197 2023/09/24 09:33:26 martin Exp $	*/
2 
3 /*
4  * Copyright (c) 2007-2011 Antti Kantee.  All Rights Reserved.
5  *
6  * Development of this software was supported by
7  * The Finnish Cultural Foundation and the Research Foundation of
8  * The Helsinki University of Technology.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
20  * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
21  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
22  * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
23  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
25  * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29  * SUCH DAMAGE.
30  */
31 
32 /*
33  * Virtual memory emulation routines.
34  */
35 
36 /*
37  * XXX: we abuse pg->uanon for the virtual address of the storage
38  * for each page.  phys_addr would fit the job description better,
39  * except that it will create unnecessary lossage on some platforms
40  * due to not being a pointer type.
41  */
42 
43 #include <sys/cdefs.h>
44 __KERNEL_RCSID(0, "$NetBSD: vm.c,v 1.197 2023/09/24 09:33:26 martin Exp $");
45 
46 #include <sys/param.h>
47 #include <sys/atomic.h>
48 #include <sys/buf.h>
49 #include <sys/kernel.h>
50 #include <sys/kmem.h>
51 #include <sys/vmem.h>
52 #include <sys/mman.h>
53 #include <sys/null.h>
54 #include <sys/vnode.h>
55 #include <sys/radixtree.h>
56 #include <sys/module.h>
57 
58 #include <machine/pmap.h>
59 
60 #if defined(__i386__) || defined(__x86_64__)
61 /*
62  * This file abuses the pmap abstraction to create its own statically
63  * allocated struct pmap object, even though it can't do anything
64  * useful with such a thing from userland.  On x86 the struct pmap
65  * definition is private, so we have to go to extra effort to abuse it
66  * there.  This should be fixed -- all of the struct pmap definitions
67  * should be private, and then rump can furnish its own fake struct
68  * pmap without clashing with anything.
69  */
70 #include <machine/pmap_private.h>
71 #endif
72 
73 #include <uvm/uvm.h>
74 #include <uvm/uvm_ddb.h>
75 #include <uvm/uvm_pdpolicy.h>
76 #include <uvm/uvm_prot.h>
77 #include <uvm/uvm_readahead.h>
78 #include <uvm/uvm_device.h>
79 
80 #include <rump-sys/kern.h>
81 #include <rump-sys/vfs.h>
82 
83 #include <rump/rumpuser.h>
84 
85 kmutex_t vmpage_lruqueue_lock; /* non-free page lock */
86 kmutex_t uvm_swap_data_lock;
87 
88 struct uvmexp uvmexp;
89 struct uvm uvm;
90 
91 #ifdef __uvmexp_pagesize
92 const int * const uvmexp_pagesize = &uvmexp.pagesize;
93 const int * const uvmexp_pagemask = &uvmexp.pagemask;
94 const int * const uvmexp_pageshift = &uvmexp.pageshift;
95 #endif
96 
97 static struct vm_map kernel_map_store;
98 struct vm_map *kernel_map = &kernel_map_store;
99 
100 static struct vm_map module_map_store;
101 
102 static struct pmap pmap_kernel;
103 struct pmap rump_pmap_local;
104 struct pmap *const kernel_pmap_ptr = &pmap_kernel;
105 
106 vmem_t *kmem_arena;
107 vmem_t *kmem_va_arena;
108 
109 static unsigned int pdaemon_waiters;
110 static kmutex_t pdaemonmtx;
111 static kcondvar_t pdaemoncv, oomwait;
112 
113 /* all local non-proc0 processes share this vmspace */
114 struct vmspace *rump_vmspace_local;
115 
116 unsigned long rump_physmemlimit = RUMPMEM_UNLIMITED;
117 static unsigned long pdlimit = RUMPMEM_UNLIMITED; /* page daemon memlimit */
118 static unsigned long curphysmem;
119 static unsigned long dddlim;		/* 90% of memory limit used */
120 #define NEED_PAGEDAEMON() \
121     (rump_physmemlimit != RUMPMEM_UNLIMITED && curphysmem > dddlim)
122 #define PDRESERVE (2*MAXPHYS)
123 
124 /*
125  * Try to free two pages worth of pages from objects.
126  * If this successfully frees a full page cache page, we'll
127  * free the released page plus PAGE_SIZE/sizeof(vm_page).
128  */
129 #define PAGEDAEMON_OBJCHUNK (2*PAGE_SIZE / sizeof(struct vm_page))
130 
131 /*
132  * Keep a list of least recently used pages.  Since the only way a
133  * rump kernel can "access" a page is via lookup, we put the page
134  * at the back of queue every time a lookup for it is done.  If the
135  * page is in front of this global queue and we're short of memory,
136  * it's a candidate for pageout.
137  */
138 static struct pglist vmpage_lruqueue;
139 static unsigned vmpage_onqueue;
140 
141 /*
142  * vm pages
143  */
144 
145 static int
pgctor(void * arg,void * obj,int flags)146 pgctor(void *arg, void *obj, int flags)
147 {
148 	struct vm_page *pg = obj;
149 
150 	memset(pg, 0, sizeof(*pg));
151 	pg->uanon = rump_hypermalloc(PAGE_SIZE, PAGE_SIZE,
152 	    (flags & PR_WAITOK) == PR_WAITOK, "pgalloc");
153 	return pg->uanon == NULL;
154 }
155 
156 static void
pgdtor(void * arg,void * obj)157 pgdtor(void *arg, void *obj)
158 {
159 	struct vm_page *pg = obj;
160 
161 	rump_hyperfree(pg->uanon, PAGE_SIZE);
162 }
163 
164 static struct pool_cache pagecache;
165 
166 /* stub for UVM_OBJ_IS_VNODE */
167 struct uvm_pagerops rump_uvm_vnodeops;
168 __weak_alias(uvm_vnodeops,rump_uvm_vnodeops);
169 
170 /*
171  * Called with the object locked.  We don't support anons.
172  */
173 struct vm_page *
uvm_pagealloc_strat(struct uvm_object * uobj,voff_t off,struct vm_anon * anon,int flags,int strat,int free_list)174 uvm_pagealloc_strat(struct uvm_object *uobj, voff_t off, struct vm_anon *anon,
175 	int flags, int strat, int free_list)
176 {
177 	struct vm_page *pg;
178 
179 	KASSERT(uobj && rw_write_held(uobj->vmobjlock));
180 	KASSERT(anon == NULL);
181 
182 	pg = pool_cache_get(&pagecache, PR_NOWAIT);
183 	if (__predict_false(pg == NULL)) {
184 		return NULL;
185 	}
186 	mutex_init(&pg->interlock, MUTEX_DEFAULT, IPL_NONE);
187 
188 	pg->offset = off;
189 	pg->uobject = uobj;
190 
191 	if (radix_tree_insert_node(&uobj->uo_pages, off >> PAGE_SHIFT,
192 	    pg) != 0) {
193 		pool_cache_put(&pagecache, pg);
194 		return NULL;
195 	}
196 
197 	if (UVM_OBJ_IS_VNODE(uobj)) {
198 		if (uobj->uo_npages == 0) {
199 			struct vnode *vp = (struct vnode *)uobj;
200 			mutex_enter(vp->v_interlock);
201 			vp->v_iflag |= VI_PAGES;
202 			mutex_exit(vp->v_interlock);
203 		}
204 		pg->flags |= PG_FILE;
205 	}
206 	uobj->uo_npages++;
207 
208 	pg->flags = PG_CLEAN|PG_BUSY|PG_FAKE;
209 	if (flags & UVM_PGA_ZERO) {
210 		uvm_pagezero(pg);
211 	}
212 
213 	/*
214 	 * Don't put anons on the LRU page queue.  We can't flush them
215 	 * (there's no concept of swap in a rump kernel), so no reason
216 	 * to bother with them.
217 	 */
218 	if (!UVM_OBJ_IS_AOBJ(uobj)) {
219 		atomic_inc_uint(&vmpage_onqueue);
220 		mutex_enter(&vmpage_lruqueue_lock);
221 		TAILQ_INSERT_TAIL(&vmpage_lruqueue, pg, pageq.queue);
222 		mutex_exit(&vmpage_lruqueue_lock);
223 	} else {
224 		pg->flags |= PG_AOBJ;
225 	}
226 
227 	return pg;
228 }
229 
230 /*
231  * Release a page.
232  *
233  * Called with the vm object locked.
234  */
235 void
uvm_pagefree(struct vm_page * pg)236 uvm_pagefree(struct vm_page *pg)
237 {
238 	struct uvm_object *uobj = pg->uobject;
239 	struct vm_page *pg2 __unused;
240 
241 	KASSERT(rw_write_held(uobj->vmobjlock));
242 
243 	mutex_enter(&pg->interlock);
244 	uvm_pagewakeup(pg);
245 	mutex_exit(&pg->interlock);
246 
247 	uobj->uo_npages--;
248 	pg2 = radix_tree_remove_node(&uobj->uo_pages, pg->offset >> PAGE_SHIFT);
249 	KASSERT(pg == pg2);
250 
251 	if (!UVM_OBJ_IS_AOBJ(uobj)) {
252 		mutex_enter(&vmpage_lruqueue_lock);
253 		TAILQ_REMOVE(&vmpage_lruqueue, pg, pageq.queue);
254 		mutex_exit(&vmpage_lruqueue_lock);
255 		atomic_dec_uint(&vmpage_onqueue);
256 	}
257 
258 	if (UVM_OBJ_IS_VNODE(uobj) && uobj->uo_npages == 0) {
259 		struct vnode *vp = (struct vnode *)uobj;
260 		mutex_enter(vp->v_interlock);
261 		vp->v_iflag &= ~VI_PAGES;
262 		mutex_exit(vp->v_interlock);
263 	}
264 
265 	mutex_destroy(&pg->interlock);
266 	pool_cache_put(&pagecache, pg);
267 }
268 
269 void
uvm_pagezero(struct vm_page * pg)270 uvm_pagezero(struct vm_page *pg)
271 {
272 
273 	uvm_pagemarkdirty(pg, UVM_PAGE_STATUS_DIRTY);
274 	memset((void *)pg->uanon, 0, PAGE_SIZE);
275 }
276 
277 /*
278  * uvm_page_owner_locked_p: return true if object associated with page is
279  * locked.  this is a weak check for runtime assertions only.
280  */
281 
282 bool
uvm_page_owner_locked_p(struct vm_page * pg,bool exclusive)283 uvm_page_owner_locked_p(struct vm_page *pg, bool exclusive)
284 {
285 
286 	if (exclusive)
287 		return rw_write_held(pg->uobject->vmobjlock);
288 	else
289 		return rw_lock_held(pg->uobject->vmobjlock);
290 }
291 
292 /*
293  * Misc routines
294  */
295 
296 static kmutex_t pagermtx;
297 
298 void
uvm_init(void)299 uvm_init(void)
300 {
301 	char buf[64];
302 
303 	if (rumpuser_getparam("RUMP_MEMLIMIT", buf, sizeof(buf)) == 0) {
304 		unsigned long tmp;
305 		char *ep;
306 		int mult;
307 
308 		tmp = strtoul(buf, &ep, 10);
309 		if (strlen(ep) > 1)
310 			panic("uvm_init: invalid RUMP_MEMLIMIT: %s", buf);
311 
312 		/* mini-dehumanize-number */
313 		mult = 1;
314 		switch (*ep) {
315 		case 'k':
316 			mult = 1024;
317 			break;
318 		case 'm':
319 			mult = 1024*1024;
320 			break;
321 		case 'g':
322 			mult = 1024*1024*1024;
323 			break;
324 		case 0:
325 			break;
326 		default:
327 			panic("uvm_init: invalid RUMP_MEMLIMIT: %s", buf);
328 		}
329 		rump_physmemlimit = tmp * mult;
330 
331 		if (rump_physmemlimit / mult != tmp)
332 			panic("uvm_init: RUMP_MEMLIMIT overflow: %s", buf);
333 
334 		/* reserve some memory for the pager */
335 		if (rump_physmemlimit <= PDRESERVE)
336 			panic("uvm_init: system reserves %d bytes of mem, "
337 			    "only %lu bytes given",
338 			    PDRESERVE, rump_physmemlimit);
339 		pdlimit = rump_physmemlimit;
340 		rump_physmemlimit -= PDRESERVE;
341 
342 		if (pdlimit < 1024*1024)
343 			printf("uvm_init: WARNING: <1MB RAM limit, "
344 			    "hope you know what you're doing\n");
345 
346 #define HUMANIZE_BYTES 9
347 		CTASSERT(sizeof(buf) >= HUMANIZE_BYTES);
348 		format_bytes(buf, HUMANIZE_BYTES, rump_physmemlimit);
349 #undef HUMANIZE_BYTES
350 		dddlim = 9 * (rump_physmemlimit / 10);
351 	} else {
352 		strlcpy(buf, "unlimited (host limit)", sizeof(buf));
353 	}
354 	aprint_verbose("total memory = %s\n", buf);
355 
356 	TAILQ_INIT(&vmpage_lruqueue);
357 
358 	if (rump_physmemlimit == RUMPMEM_UNLIMITED) {
359 		uvmexp.npages = physmem;
360 	} else {
361 		uvmexp.npages = pdlimit >> PAGE_SHIFT;
362 		uvmexp.reserve_pagedaemon = PDRESERVE >> PAGE_SHIFT;
363 		uvmexp.freetarg = (rump_physmemlimit-dddlim) >> PAGE_SHIFT;
364 	}
365 	/*
366 	 * uvmexp.free is not used internally or updated.  The reason is
367 	 * that the memory hypercall allocator is allowed to allocate
368 	 * non-page sized chunks.  We use a byte count in curphysmem
369 	 * instead.
370 	 */
371 	uvmexp.free = uvmexp.npages;
372 
373 #ifndef __uvmexp_pagesize
374 	uvmexp.pagesize = PAGE_SIZE;
375 	uvmexp.pagemask = PAGE_MASK;
376 	uvmexp.pageshift = PAGE_SHIFT;
377 #else
378 	uvmexp.pagesize = rumpuser_getpagesize();
379 	uvmexp.pagemask = uvmexp.pagesize-1;
380 	uvmexp.pageshift = ffs(uvmexp.pagesize)-1;
381 #endif
382 
383 	mutex_init(&pagermtx, MUTEX_DEFAULT, IPL_NONE);
384 	mutex_init(&vmpage_lruqueue_lock, MUTEX_DEFAULT, IPL_NONE);
385 	mutex_init(&uvm_swap_data_lock, MUTEX_DEFAULT, IPL_NONE);
386 	mutex_init(&pdaemonmtx, MUTEX_DEFAULT, IPL_NONE);
387 
388 	cv_init(&pdaemoncv, "pdaemon");
389 	cv_init(&oomwait, "oomwait");
390 
391 	module_map = &module_map_store;
392 
393 	kernel_map->pmap = pmap_kernel();
394 
395 	pool_subsystem_init();
396 
397 	kmem_arena = vmem_create("kmem", 0, 1024*1024, PAGE_SIZE,
398 	    NULL, NULL, NULL,
399 	    0, VM_NOSLEEP | VM_BOOTSTRAP, IPL_VM);
400 
401 	vmem_subsystem_init(kmem_arena);
402 
403 	kmem_va_arena = vmem_create("kva", 0, 0, PAGE_SIZE,
404 	    vmem_alloc, vmem_free, kmem_arena,
405 	    8 * PAGE_SIZE, VM_NOSLEEP | VM_BOOTSTRAP, IPL_VM);
406 
407 	pool_cache_bootstrap(&pagecache, sizeof(struct vm_page), 0, 0, 0,
408 	    "page$", NULL, IPL_NONE, pgctor, pgdtor, NULL);
409 
410 	radix_tree_init();
411 
412 	/* create vmspace used by local clients */
413 	rump_vmspace_local = kmem_zalloc(sizeof(*rump_vmspace_local), KM_SLEEP);
414 	uvmspace_init(rump_vmspace_local, &rump_pmap_local, 0, 0, false);
415 }
416 
417 void
uvmspace_init(struct vmspace * vm,struct pmap * pmap,vaddr_t vmin,vaddr_t vmax,bool topdown)418 uvmspace_init(struct vmspace *vm, struct pmap *pmap, vaddr_t vmin, vaddr_t vmax,
419     bool topdown)
420 {
421 
422 	vm->vm_map.pmap = pmap;
423 	vm->vm_refcnt = 1;
424 }
425 
426 int
uvm_map_pageable(struct vm_map * map,vaddr_t start,vaddr_t end,bool new_pageable,int lockflags)427 uvm_map_pageable(struct vm_map *map, vaddr_t start, vaddr_t end,
428     bool new_pageable, int lockflags)
429 {
430 	return 0;
431 }
432 
433 void
uvm_pagewire(struct vm_page * pg)434 uvm_pagewire(struct vm_page *pg)
435 {
436 
437 	/* nada */
438 }
439 
440 void
uvm_pageunwire(struct vm_page * pg)441 uvm_pageunwire(struct vm_page *pg)
442 {
443 
444 	/* nada */
445 }
446 
447 int
uvm_availmem(bool cached)448 uvm_availmem(bool cached)
449 {
450 
451 	return uvmexp.free;
452 }
453 
454 void
uvm_pagelock(struct vm_page * pg)455 uvm_pagelock(struct vm_page *pg)
456 {
457 
458 	mutex_enter(&pg->interlock);
459 }
460 
461 void
uvm_pagelock2(struct vm_page * pg1,struct vm_page * pg2)462 uvm_pagelock2(struct vm_page *pg1, struct vm_page *pg2)
463 {
464 
465 	if (pg1 < pg2) {
466 		mutex_enter(&pg1->interlock);
467 		mutex_enter(&pg2->interlock);
468 	} else {
469 		mutex_enter(&pg2->interlock);
470 		mutex_enter(&pg1->interlock);
471 	}
472 }
473 
474 void
uvm_pageunlock(struct vm_page * pg)475 uvm_pageunlock(struct vm_page *pg)
476 {
477 
478 	mutex_exit(&pg->interlock);
479 }
480 
481 void
uvm_pageunlock2(struct vm_page * pg1,struct vm_page * pg2)482 uvm_pageunlock2(struct vm_page *pg1, struct vm_page *pg2)
483 {
484 
485 	mutex_exit(&pg1->interlock);
486 	mutex_exit(&pg2->interlock);
487 }
488 
489 /* where's your schmonz now? */
490 #define PUNLIMIT(a)	\
491 p->p_rlimit[a].rlim_cur = p->p_rlimit[a].rlim_max = RLIM_INFINITY;
492 void
uvm_init_limits(struct proc * p)493 uvm_init_limits(struct proc *p)
494 {
495 
496 #ifndef DFLSSIZ
497 #define DFLSSIZ (16*1024*1024)
498 #endif
499 	p->p_rlimit[RLIMIT_STACK].rlim_cur = DFLSSIZ;
500 	p->p_rlimit[RLIMIT_STACK].rlim_max = MAXSSIZ;
501 	PUNLIMIT(RLIMIT_DATA);
502 	PUNLIMIT(RLIMIT_RSS);
503 	PUNLIMIT(RLIMIT_AS);
504 	/* nice, cascade */
505 }
506 #undef PUNLIMIT
507 
508 /*
509  * This satisfies the "disgusting mmap hack" used by proplib.
510  */
511 int
uvm_mmap_anon(struct proc * p,void ** addrp,size_t size)512 uvm_mmap_anon(struct proc *p, void **addrp, size_t size)
513 {
514 	int error;
515 
516 	/* no reason in particular, but cf. uvm_default_mapaddr() */
517 	if (*addrp != NULL)
518 		panic("uvm_mmap() variant unsupported");
519 
520 	if (RUMP_LOCALPROC_P(curproc)) {
521 		error = rumpuser_anonmmap(NULL, size, 0, 0, addrp);
522 	} else {
523 		error = rump_sysproxy_anonmmap(RUMP_SPVM2CTL(p->p_vmspace),
524 		    size, addrp);
525 	}
526 	return error;
527 }
528 
529 /*
530  * Stubs for things referenced from vfs_vnode.c but not used.
531  */
532 const dev_t zerodev;
533 
534 struct uvm_object *
udv_attach(dev_t device,vm_prot_t accessprot,voff_t off,vsize_t size)535 udv_attach(dev_t device, vm_prot_t accessprot, voff_t off, vsize_t size)
536 {
537 	return NULL;
538 }
539 
540 struct pagerinfo {
541 	vaddr_t pgr_kva;
542 	int pgr_npages;
543 	struct vm_page **pgr_pgs;
544 	bool pgr_read;
545 
546 	LIST_ENTRY(pagerinfo) pgr_entries;
547 };
548 static LIST_HEAD(, pagerinfo) pagerlist = LIST_HEAD_INITIALIZER(pagerlist);
549 
550 /*
551  * Pager "map" in routine.  Instead of mapping, we allocate memory
552  * and copy page contents there.  The reason for copying instead of
553  * mapping is simple: we do not assume we are running on virtual
554  * memory.  Even if we could emulate virtual memory in some envs
555  * such as userspace, copying is much faster than trying to awkardly
556  * cope with remapping (see "Design and Implementation" pp.95-98).
557  * The downside of the approach is that the pager requires MAXPHYS
558  * free memory to perform paging, but short of virtual memory or
559  * making the pager do I/O in page-sized chunks we cannot do much
560  * about that.
561  */
562 vaddr_t
uvm_pagermapin(struct vm_page ** pgs,int npages,int flags)563 uvm_pagermapin(struct vm_page **pgs, int npages, int flags)
564 {
565 	struct pagerinfo *pgri;
566 	vaddr_t curkva;
567 	int i;
568 
569 	/* allocate structures */
570 	pgri = kmem_alloc(sizeof(*pgri), KM_SLEEP);
571 	pgri->pgr_kva = (vaddr_t)kmem_alloc(npages * PAGE_SIZE, KM_SLEEP);
572 	pgri->pgr_npages = npages;
573 	pgri->pgr_pgs = kmem_alloc(sizeof(struct vm_page *) * npages, KM_SLEEP);
574 	pgri->pgr_read = (flags & UVMPAGER_MAPIN_READ) != 0;
575 
576 	/* copy contents to "mapped" memory */
577 	for (i = 0, curkva = pgri->pgr_kva;
578 	    i < npages;
579 	    i++, curkva += PAGE_SIZE) {
580 		/*
581 		 * We need to copy the previous contents of the pages to
582 		 * the window even if we are reading from the
583 		 * device, since the device might not fill the contents of
584 		 * the full mapped range and we will end up corrupting
585 		 * data when we unmap the window.
586 		 */
587 		memcpy((void*)curkva, pgs[i]->uanon, PAGE_SIZE);
588 		pgri->pgr_pgs[i] = pgs[i];
589 	}
590 
591 	mutex_enter(&pagermtx);
592 	LIST_INSERT_HEAD(&pagerlist, pgri, pgr_entries);
593 	mutex_exit(&pagermtx);
594 
595 	return pgri->pgr_kva;
596 }
597 
598 /*
599  * map out the pager window.  return contents from VA to page storage
600  * and free structures.
601  *
602  * Note: does not currently support partial frees
603  */
604 void
uvm_pagermapout(vaddr_t kva,int npages)605 uvm_pagermapout(vaddr_t kva, int npages)
606 {
607 	struct pagerinfo *pgri;
608 	vaddr_t curkva;
609 	int i;
610 
611 	mutex_enter(&pagermtx);
612 	LIST_FOREACH(pgri, &pagerlist, pgr_entries) {
613 		if (pgri->pgr_kva == kva)
614 			break;
615 	}
616 	KASSERT(pgri);
617 	if (pgri->pgr_npages != npages)
618 		panic("uvm_pagermapout: partial unmapping not supported");
619 	LIST_REMOVE(pgri, pgr_entries);
620 	mutex_exit(&pagermtx);
621 
622 	if (pgri->pgr_read) {
623 		for (i = 0, curkva = pgri->pgr_kva;
624 		    i < pgri->pgr_npages;
625 		    i++, curkva += PAGE_SIZE) {
626 			memcpy(pgri->pgr_pgs[i]->uanon,(void*)curkva,PAGE_SIZE);
627 		}
628 	}
629 
630 	kmem_free(pgri->pgr_pgs, npages * sizeof(struct vm_page *));
631 	kmem_free((void*)pgri->pgr_kva, npages * PAGE_SIZE);
632 	kmem_free(pgri, sizeof(*pgri));
633 }
634 
635 /*
636  * convert va in pager window to page structure.
637  * XXX: how expensive is this (global lock, list traversal)?
638  */
639 struct vm_page *
uvm_pageratop(vaddr_t va)640 uvm_pageratop(vaddr_t va)
641 {
642 	struct pagerinfo *pgri;
643 	struct vm_page *pg = NULL;
644 	int i;
645 
646 	mutex_enter(&pagermtx);
647 	LIST_FOREACH(pgri, &pagerlist, pgr_entries) {
648 		if (pgri->pgr_kva <= va
649 		    && va < pgri->pgr_kva + pgri->pgr_npages*PAGE_SIZE)
650 			break;
651 	}
652 	if (pgri) {
653 		i = (va - pgri->pgr_kva) >> PAGE_SHIFT;
654 		pg = pgri->pgr_pgs[i];
655 	}
656 	mutex_exit(&pagermtx);
657 
658 	return pg;
659 }
660 
661 /*
662  * Called with the vm object locked.
663  *
664  * Put vnode object pages at the end of the access queue to indicate
665  * they have been recently accessed and should not be immediate
666  * candidates for pageout.  Do not do this for lookups done by
667  * the pagedaemon to mimic pmap_kentered mappings which don't track
668  * access information.
669  */
670 struct vm_page *
uvm_pagelookup(struct uvm_object * uobj,voff_t off)671 uvm_pagelookup(struct uvm_object *uobj, voff_t off)
672 {
673 	struct vm_page *pg;
674 	bool ispagedaemon = curlwp == uvm.pagedaemon_lwp;
675 
676 	pg = radix_tree_lookup_node(&uobj->uo_pages, off >> PAGE_SHIFT);
677 	if (pg && !UVM_OBJ_IS_AOBJ(pg->uobject) && !ispagedaemon) {
678 		mutex_enter(&vmpage_lruqueue_lock);
679 		TAILQ_REMOVE(&vmpage_lruqueue, pg, pageq.queue);
680 		TAILQ_INSERT_TAIL(&vmpage_lruqueue, pg, pageq.queue);
681 		mutex_exit(&vmpage_lruqueue_lock);
682 	}
683 
684 	return pg;
685 }
686 
687 void
uvm_page_unbusy(struct vm_page ** pgs,int npgs)688 uvm_page_unbusy(struct vm_page **pgs, int npgs)
689 {
690 	struct vm_page *pg;
691 	int i, pageout_done;
692 
693 	KASSERT(npgs > 0);
694 
695 	pageout_done = 0;
696 	for (i = 0; i < npgs; i++) {
697 		pg = pgs[i];
698 		if (pg == NULL || pg == PGO_DONTCARE) {
699 			continue;
700 		}
701 
702 #if 0
703 		KASSERT(uvm_page_owner_locked_p(pg, true));
704 #else
705 		/*
706 		 * uvm_page_owner_locked_p() is not available in rump,
707 		 * and rump doesn't support amaps anyway.
708 		 */
709 		KASSERT(rw_write_held(pg->uobject->vmobjlock));
710 #endif
711 		KASSERT(pg->flags & PG_BUSY);
712 
713 		if (pg->flags & PG_PAGEOUT) {
714 			pg->flags &= ~PG_PAGEOUT;
715 			pg->flags |= PG_RELEASED;
716 			pageout_done++;
717 			atomic_inc_uint(&uvmexp.pdfreed);
718 		}
719 		if (pg->flags & PG_RELEASED) {
720 			KASSERT(pg->uobject != NULL ||
721 			    (pg->uanon != NULL && pg->uanon->an_ref > 0));
722 			pg->flags &= ~PG_RELEASED;
723 			uvm_pagefree(pg);
724 		} else {
725 			KASSERT((pg->flags & PG_FAKE) == 0);
726 			pg->flags &= ~PG_BUSY;
727 			uvm_pagelock(pg);
728 			uvm_pagewakeup(pg);
729 			uvm_pageunlock(pg);
730 			UVM_PAGE_OWN(pg, NULL);
731 		}
732 	}
733 	if (pageout_done != 0) {
734 		uvm_pageout_done(pageout_done);
735 	}
736 }
737 
738 void
uvm_pagewait(struct vm_page * pg,krwlock_t * lock,const char * wmesg)739 uvm_pagewait(struct vm_page *pg, krwlock_t *lock, const char *wmesg)
740 {
741 
742 	KASSERT(rw_lock_held(lock));
743 	KASSERT((pg->flags & PG_BUSY) != 0);
744 
745 	mutex_enter(&pg->interlock);
746 	pg->pqflags |= PQ_WANTED;
747 	rw_exit(lock);
748 	UVM_UNLOCK_AND_WAIT(pg, &pg->interlock, false, wmesg, 0);
749 }
750 
751 void
uvm_pagewakeup(struct vm_page * pg)752 uvm_pagewakeup(struct vm_page *pg)
753 {
754 
755 	KASSERT(mutex_owned(&pg->interlock));
756 
757 	if ((pg->pqflags & PQ_WANTED) != 0) {
758 		pg->pqflags &= ~PQ_WANTED;
759 		wakeup(pg);
760 	}
761 }
762 
763 void
uvm_estimatepageable(int * active,int * inactive)764 uvm_estimatepageable(int *active, int *inactive)
765 {
766 
767 	/* XXX: guessing game */
768 	*active = 1024;
769 	*inactive = 1024;
770 }
771 
772 int
uvm_loan(struct vm_map * map,vaddr_t start,vsize_t len,void * v,int flags)773 uvm_loan(struct vm_map *map, vaddr_t start, vsize_t len, void *v, int flags)
774 {
775 
776 	panic("%s: unimplemented", __func__);
777 }
778 
779 void
uvm_unloan(void * v,int npages,int flags)780 uvm_unloan(void *v, int npages, int flags)
781 {
782 
783 	panic("%s: unimplemented", __func__);
784 }
785 
786 int
uvm_loanuobjpages(struct uvm_object * uobj,voff_t pgoff,int orignpages,struct vm_page ** opp)787 uvm_loanuobjpages(struct uvm_object *uobj, voff_t pgoff, int orignpages,
788 	struct vm_page **opp)
789 {
790 
791 	return EBUSY;
792 }
793 
794 struct vm_page *
uvm_loanbreak(struct vm_page * pg)795 uvm_loanbreak(struct vm_page *pg)
796 {
797 
798 	panic("%s: unimplemented", __func__);
799 }
800 
801 void
ubc_purge(struct uvm_object * uobj)802 ubc_purge(struct uvm_object *uobj)
803 {
804 
805 }
806 
807 vaddr_t
uvm_default_mapaddr(struct proc * p,vaddr_t base,vsize_t sz,int topdown)808 uvm_default_mapaddr(struct proc *p, vaddr_t base, vsize_t sz, int topdown)
809 {
810 
811 	return 0;
812 }
813 
814 int
uvm_map_protect(struct vm_map * map,vaddr_t start,vaddr_t end,vm_prot_t prot,bool set_max)815 uvm_map_protect(struct vm_map *map, vaddr_t start, vaddr_t end,
816 	vm_prot_t prot, bool set_max)
817 {
818 
819 	return EOPNOTSUPP;
820 }
821 
822 int
uvm_map(struct vm_map * map,vaddr_t * startp,vsize_t size,struct uvm_object * uobj,voff_t uoffset,vsize_t align,uvm_flag_t flags)823 uvm_map(struct vm_map *map, vaddr_t *startp, vsize_t size,
824     struct uvm_object *uobj, voff_t uoffset, vsize_t align,
825     uvm_flag_t flags)
826 {
827 
828 	*startp = (vaddr_t)rump_hypermalloc(size, align, true, "uvm_map");
829 	return *startp != 0 ? 0 : ENOMEM;
830 }
831 
832 void
uvm_unmap1(struct vm_map * map,vaddr_t start,vaddr_t end,int flags)833 uvm_unmap1(struct vm_map *map, vaddr_t start, vaddr_t end, int flags)
834 {
835 
836 	rump_hyperfree((void*)start, end-start);
837 }
838 
839 
840 /*
841  * UVM km
842  */
843 
844 vaddr_t
uvm_km_alloc(struct vm_map * map,vsize_t size,vsize_t align,uvm_flag_t flags)845 uvm_km_alloc(struct vm_map *map, vsize_t size, vsize_t align, uvm_flag_t flags)
846 {
847 	void *rv, *desired = NULL;
848 	int alignbit, error;
849 
850 #ifdef __x86_64__
851 	/*
852 	 * On amd64, allocate all module memory from the lowest 2GB.
853 	 * This is because NetBSD kernel modules are compiled
854 	 * with -mcmodel=kernel and reserve only 4 bytes for
855 	 * offsets.  If we load code compiled with -mcmodel=kernel
856 	 * anywhere except the lowest or highest 2GB, it will not
857 	 * work.  Since userspace does not have access to the highest
858 	 * 2GB, use the lowest 2GB.
859 	 *
860 	 * Note: this assumes the rump kernel resides in
861 	 * the lowest 2GB as well.
862 	 *
863 	 * Note2: yes, it's a quick hack, but since this the only
864 	 * place where we care about the map we're allocating from,
865 	 * just use a simple "if" instead of coming up with a fancy
866 	 * generic solution.
867 	 */
868 	if (map == module_map) {
869 		desired = (void *)(0x80000000 - size);
870 	}
871 #endif
872 
873 	if (__predict_false(map == module_map)) {
874 		alignbit = 0;
875 		if (align) {
876 			alignbit = ffs(align)-1;
877 		}
878 		error = rumpuser_anonmmap(desired, size, alignbit,
879 		    flags & UVM_KMF_EXEC, &rv);
880 	} else {
881 		error = rumpuser_malloc(size, align, &rv);
882 	}
883 
884 	if (error) {
885 		if (flags & (UVM_KMF_CANFAIL | UVM_KMF_NOWAIT))
886 			return 0;
887 		else
888 			panic("uvm_km_alloc failed");
889 	}
890 
891 	if (flags & UVM_KMF_ZERO)
892 		memset(rv, 0, size);
893 
894 	return (vaddr_t)rv;
895 }
896 
897 void
uvm_km_free(struct vm_map * map,vaddr_t vaddr,vsize_t size,uvm_flag_t flags)898 uvm_km_free(struct vm_map *map, vaddr_t vaddr, vsize_t size, uvm_flag_t flags)
899 {
900 
901 	if (__predict_false(map == module_map))
902 		rumpuser_unmap((void *)vaddr, size);
903 	else
904 		rumpuser_free((void *)vaddr, size);
905 }
906 
907 int
uvm_km_protect(struct vm_map * map,vaddr_t vaddr,vsize_t size,vm_prot_t prot)908 uvm_km_protect(struct vm_map *map, vaddr_t vaddr, vsize_t size, vm_prot_t prot)
909 {
910 	return 0;
911 }
912 
913 struct vm_map *
uvm_km_suballoc(struct vm_map * map,vaddr_t * minaddr,vaddr_t * maxaddr,vsize_t size,int pageable,bool fixed,struct vm_map * submap)914 uvm_km_suballoc(struct vm_map *map, vaddr_t *minaddr, vaddr_t *maxaddr,
915 	vsize_t size, int pageable, bool fixed, struct vm_map *submap)
916 {
917 
918 	return (struct vm_map *)417416;
919 }
920 
921 int
uvm_km_kmem_alloc(vmem_t * vm,vmem_size_t size,vm_flag_t flags,vmem_addr_t * addr)922 uvm_km_kmem_alloc(vmem_t *vm, vmem_size_t size, vm_flag_t flags,
923     vmem_addr_t *addr)
924 {
925 	vaddr_t va;
926 	va = (vaddr_t)rump_hypermalloc(size, PAGE_SIZE,
927 	    (flags & VM_SLEEP), "kmalloc");
928 
929 	if (va) {
930 		*addr = va;
931 		return 0;
932 	} else {
933 		return ENOMEM;
934 	}
935 }
936 
937 void
uvm_km_kmem_free(vmem_t * vm,vmem_addr_t addr,vmem_size_t size)938 uvm_km_kmem_free(vmem_t *vm, vmem_addr_t addr, vmem_size_t size)
939 {
940 
941 	rump_hyperfree((void *)addr, size);
942 }
943 
944 /*
945  * VM space locking routines.  We don't really have to do anything,
946  * since the pages are always "wired" (both local and remote processes).
947  */
948 int
uvm_vslock(struct vmspace * vs,void * addr,size_t len,vm_prot_t access)949 uvm_vslock(struct vmspace *vs, void *addr, size_t len, vm_prot_t access)
950 {
951 
952 	return 0;
953 }
954 
955 void
uvm_vsunlock(struct vmspace * vs,void * addr,size_t len)956 uvm_vsunlock(struct vmspace *vs, void *addr, size_t len)
957 {
958 
959 }
960 
961 /*
962  * For the local case the buffer mappers don't need to do anything.
963  * For the remote case we need to reserve space and copy data in or
964  * out, depending on B_READ/B_WRITE.
965  */
966 int
vmapbuf(struct buf * bp,vsize_t len)967 vmapbuf(struct buf *bp, vsize_t len)
968 {
969 	int error = 0;
970 
971 	bp->b_saveaddr = bp->b_data;
972 
973 	/* remote case */
974 	if (!RUMP_LOCALPROC_P(curproc)) {
975 		bp->b_data = rump_hypermalloc(len, 0, true, "vmapbuf");
976 		if (BUF_ISWRITE(bp)) {
977 			error = copyin(bp->b_saveaddr, bp->b_data, len);
978 			if (error) {
979 				rump_hyperfree(bp->b_data, len);
980 				bp->b_data = bp->b_saveaddr;
981 				bp->b_saveaddr = 0;
982 			}
983 		}
984 	}
985 
986 	return error;
987 }
988 
989 void
vunmapbuf(struct buf * bp,vsize_t len)990 vunmapbuf(struct buf *bp, vsize_t len)
991 {
992 
993 	/* remote case */
994 	if (!RUMP_LOCALPROC_P(bp->b_proc)) {
995 		if (BUF_ISREAD(bp)) {
996 			bp->b_error = copyout_proc(bp->b_proc,
997 			    bp->b_data, bp->b_saveaddr, len);
998 		}
999 		rump_hyperfree(bp->b_data, len);
1000 	}
1001 
1002 	bp->b_data = bp->b_saveaddr;
1003 	bp->b_saveaddr = 0;
1004 }
1005 
1006 void
uvmspace_addref(struct vmspace * vm)1007 uvmspace_addref(struct vmspace *vm)
1008 {
1009 
1010 	/*
1011 	 * No dynamically allocated vmspaces exist.
1012 	 */
1013 }
1014 
1015 void
uvmspace_free(struct vmspace * vm)1016 uvmspace_free(struct vmspace *vm)
1017 {
1018 
1019 	/* nothing for now */
1020 }
1021 
1022 /*
1023  * page life cycle stuff.  it really doesn't exist, so just stubs.
1024  */
1025 
1026 void
uvm_pageactivate(struct vm_page * pg)1027 uvm_pageactivate(struct vm_page *pg)
1028 {
1029 
1030 	/* nada */
1031 }
1032 
1033 void
uvm_pagedeactivate(struct vm_page * pg)1034 uvm_pagedeactivate(struct vm_page *pg)
1035 {
1036 
1037 	/* nada */
1038 }
1039 
1040 void
uvm_pagedequeue(struct vm_page * pg)1041 uvm_pagedequeue(struct vm_page *pg)
1042 {
1043 
1044 	/* nada*/
1045 }
1046 
1047 void
uvm_pageenqueue(struct vm_page * pg)1048 uvm_pageenqueue(struct vm_page *pg)
1049 {
1050 
1051 	/* nada */
1052 }
1053 
1054 void
uvmpdpol_anfree(struct vm_anon * an)1055 uvmpdpol_anfree(struct vm_anon *an)
1056 {
1057 
1058 	/* nada */
1059 }
1060 
1061 /*
1062  * Physical address accessors.
1063  */
1064 
1065 struct vm_page *
uvm_phys_to_vm_page(paddr_t pa)1066 uvm_phys_to_vm_page(paddr_t pa)
1067 {
1068 
1069 	return NULL;
1070 }
1071 
1072 paddr_t
uvm_vm_page_to_phys(const struct vm_page * pg)1073 uvm_vm_page_to_phys(const struct vm_page *pg)
1074 {
1075 
1076 	return 0;
1077 }
1078 
1079 vaddr_t
uvm_uarea_alloc(void)1080 uvm_uarea_alloc(void)
1081 {
1082 
1083 	/* non-zero */
1084 	return (vaddr_t)11;
1085 }
1086 
1087 void
uvm_uarea_free(vaddr_t uarea)1088 uvm_uarea_free(vaddr_t uarea)
1089 {
1090 
1091 	/* nata, so creamy */
1092 }
1093 
1094 /*
1095  * Routines related to the Page Baroness.
1096  */
1097 
1098 void
uvm_wait(const char * msg)1099 uvm_wait(const char *msg)
1100 {
1101 
1102 	if (__predict_false(rump_threads == 0))
1103 		panic("pagedaemon missing (RUMP_THREADS = 0)");
1104 
1105 	if (curlwp == uvm.pagedaemon_lwp) {
1106 		/* is it possible for us to later get memory? */
1107 		if (!uvmexp.paging)
1108 			panic("pagedaemon out of memory");
1109 	}
1110 
1111 	mutex_enter(&pdaemonmtx);
1112 	pdaemon_waiters++;
1113 	cv_signal(&pdaemoncv);
1114 	cv_wait(&oomwait, &pdaemonmtx);
1115 	mutex_exit(&pdaemonmtx);
1116 }
1117 
1118 void
uvm_pageout_start(int npages)1119 uvm_pageout_start(int npages)
1120 {
1121 
1122 	mutex_enter(&pdaemonmtx);
1123 	uvmexp.paging += npages;
1124 	mutex_exit(&pdaemonmtx);
1125 }
1126 
1127 void
uvm_pageout_done(int npages)1128 uvm_pageout_done(int npages)
1129 {
1130 
1131 	if (!npages)
1132 		return;
1133 
1134 	mutex_enter(&pdaemonmtx);
1135 	KASSERT(uvmexp.paging >= npages);
1136 	uvmexp.paging -= npages;
1137 
1138 	if (pdaemon_waiters) {
1139 		pdaemon_waiters = 0;
1140 		cv_broadcast(&oomwait);
1141 	}
1142 	mutex_exit(&pdaemonmtx);
1143 }
1144 
1145 static bool
processpage(struct vm_page * pg)1146 processpage(struct vm_page *pg)
1147 {
1148 	struct uvm_object *uobj;
1149 
1150 	uobj = pg->uobject;
1151 	if (rw_tryenter(uobj->vmobjlock, RW_WRITER)) {
1152 		if ((pg->flags & PG_BUSY) == 0) {
1153 			mutex_exit(&vmpage_lruqueue_lock);
1154 			uobj->pgops->pgo_put(uobj, pg->offset,
1155 			    pg->offset + PAGE_SIZE,
1156 			    PGO_CLEANIT|PGO_FREE);
1157 			KASSERT(!rw_write_held(uobj->vmobjlock));
1158 			return true;
1159 		} else {
1160 			rw_exit(uobj->vmobjlock);
1161 		}
1162 	}
1163 
1164 	return false;
1165 }
1166 
1167 /*
1168  * The Diabolical pageDaemon Director (DDD).
1169  *
1170  * This routine can always use better heuristics.
1171  */
1172 void
uvm_pageout(void * arg)1173 uvm_pageout(void *arg)
1174 {
1175 	struct vm_page *pg;
1176 	struct pool *pp, *pp_first;
1177 	int cleaned, skip, skipped;
1178 	bool succ;
1179 
1180 	mutex_enter(&pdaemonmtx);
1181 	for (;;) {
1182 		if (pdaemon_waiters) {
1183 			pdaemon_waiters = 0;
1184 			cv_broadcast(&oomwait);
1185 		}
1186 		if (!NEED_PAGEDAEMON()) {
1187 			kernel_map->flags &= ~VM_MAP_WANTVA;
1188 			cv_wait(&pdaemoncv, &pdaemonmtx);
1189 		}
1190 		uvmexp.pdwoke++;
1191 
1192 		/* tell the world that we are hungry */
1193 		kernel_map->flags |= VM_MAP_WANTVA;
1194 		mutex_exit(&pdaemonmtx);
1195 
1196 		/*
1197 		 * step one: reclaim the page cache.  this should give
1198 		 * us the biggest earnings since whole pages are released
1199 		 * into backing memory.
1200 		 */
1201 		pool_cache_reclaim(&pagecache);
1202 		if (!NEED_PAGEDAEMON()) {
1203 			mutex_enter(&pdaemonmtx);
1204 			continue;
1205 		}
1206 
1207 		/*
1208 		 * Ok, so that didn't help.  Next, try to hunt memory
1209 		 * by pushing out vnode pages.  The pages might contain
1210 		 * useful cached data, but we need the memory.
1211 		 */
1212 		cleaned = 0;
1213 		skip = 0;
1214  again:
1215 		mutex_enter(&vmpage_lruqueue_lock);
1216 		while (cleaned < PAGEDAEMON_OBJCHUNK) {
1217 			skipped = 0;
1218 			TAILQ_FOREACH(pg, &vmpage_lruqueue, pageq.queue) {
1219 
1220 				/*
1221 				 * skip over pages we _might_ have tried
1222 				 * to handle earlier.  they might not be
1223 				 * exactly the same ones, but I'm not too
1224 				 * concerned.
1225 				 */
1226 				while (skipped++ < skip)
1227 					continue;
1228 
1229 				if (processpage(pg)) {
1230 					cleaned++;
1231 					goto again;
1232 				}
1233 
1234 				skip++;
1235 			}
1236 			break;
1237 		}
1238 		mutex_exit(&vmpage_lruqueue_lock);
1239 
1240 		/*
1241 		 * And of course we need to reclaim the page cache
1242 		 * again to actually release memory.
1243 		 */
1244 		pool_cache_reclaim(&pagecache);
1245 		if (!NEED_PAGEDAEMON()) {
1246 			mutex_enter(&pdaemonmtx);
1247 			continue;
1248 		}
1249 
1250 		/*
1251 		 * And then drain the pools.  Wipe them out ... all of them.
1252 		 */
1253 		for (pp_first = NULL;;) {
1254 			rump_vfs_drainbufs(10 /* XXX: estimate! */);
1255 
1256 			succ = pool_drain(&pp);
1257 			if (succ || pp == pp_first)
1258 				break;
1259 
1260 			if (pp_first == NULL)
1261 				pp_first = pp;
1262 		}
1263 
1264 		/*
1265 		 * Need to use PYEC on our bag of tricks.
1266 		 * Unfortunately, the wife just borrowed it.
1267 		 */
1268 
1269 		mutex_enter(&pdaemonmtx);
1270 		if (!succ && cleaned == 0 && pdaemon_waiters &&
1271 		    uvmexp.paging == 0) {
1272 			kpause("pddlk", false, hz, &pdaemonmtx);
1273 		}
1274 	}
1275 
1276 	panic("you can swap out any time you like, but you can never leave");
1277 }
1278 
1279 void
uvm_kick_pdaemon()1280 uvm_kick_pdaemon()
1281 {
1282 
1283 	/*
1284 	 * Wake up the diabolical pagedaemon director if we are over
1285 	 * 90% of the memory limit.  This is a complete and utter
1286 	 * stetson-harrison decision which you are allowed to finetune.
1287 	 * Don't bother locking.  If we have some unflushed caches,
1288 	 * other waker-uppers will deal with the issue.
1289 	 */
1290 	if (NEED_PAGEDAEMON()) {
1291 		cv_signal(&pdaemoncv);
1292 	}
1293 }
1294 
1295 void *
rump_hypermalloc(size_t howmuch,int alignment,bool waitok,const char * wmsg)1296 rump_hypermalloc(size_t howmuch, int alignment, bool waitok, const char *wmsg)
1297 {
1298 	const unsigned long thelimit =
1299 	    curlwp == uvm.pagedaemon_lwp ? pdlimit : rump_physmemlimit;
1300 	unsigned long newmem;
1301 	void *rv;
1302 	int error;
1303 
1304 	uvm_kick_pdaemon(); /* ouch */
1305 
1306 	/* first we must be within the limit */
1307  limitagain:
1308 	if (thelimit != RUMPMEM_UNLIMITED) {
1309 		newmem = atomic_add_long_nv(&curphysmem, howmuch);
1310 		if (newmem > thelimit) {
1311 			newmem = atomic_add_long_nv(&curphysmem, -howmuch);
1312 			if (!waitok) {
1313 				return NULL;
1314 			}
1315 			uvm_wait(wmsg);
1316 			goto limitagain;
1317 		}
1318 	}
1319 
1320 	/* second, we must get something from the backend */
1321  again:
1322 	error = rumpuser_malloc(howmuch, alignment, &rv);
1323 	if (__predict_false(error && waitok)) {
1324 		uvm_wait(wmsg);
1325 		goto again;
1326 	}
1327 
1328 	return rv;
1329 }
1330 
1331 void
rump_hyperfree(void * what,size_t size)1332 rump_hyperfree(void *what, size_t size)
1333 {
1334 
1335 	if (rump_physmemlimit != RUMPMEM_UNLIMITED) {
1336 		atomic_add_long(&curphysmem, -size);
1337 	}
1338 	rumpuser_free(what, size);
1339 }
1340 
1341 /*
1342  * UBC
1343  */
1344 
1345 #define PAGERFLAGS (PGO_SYNCIO | PGO_NOBLOCKALLOC | PGO_NOTIMESTAMP)
1346 
1347 void
ubc_zerorange(struct uvm_object * uobj,off_t off,size_t len,int flags)1348 ubc_zerorange(struct uvm_object *uobj, off_t off, size_t len, int flags)
1349 {
1350 	struct vm_page **pgs;
1351 	int maxpages = MIN(32, round_page(len) >> PAGE_SHIFT);
1352 	int npages, i;
1353 
1354 	if (maxpages == 0)
1355 		return;
1356 
1357 	pgs = kmem_alloc(maxpages * sizeof(pgs), KM_SLEEP);
1358 	rw_enter(uobj->vmobjlock, RW_WRITER);
1359 	while (len) {
1360 		npages = MIN(maxpages, round_page(len) >> PAGE_SHIFT);
1361 		memset(pgs, 0, npages * sizeof(struct vm_page *));
1362 		(void)uobj->pgops->pgo_get(uobj, trunc_page(off),
1363 		    pgs, &npages, 0, VM_PROT_READ | VM_PROT_WRITE,
1364 		    0, PAGERFLAGS | PGO_PASTEOF);
1365 		KASSERT(npages > 0);
1366 
1367 		rw_enter(uobj->vmobjlock, RW_WRITER);
1368 		for (i = 0; i < npages; i++) {
1369 			struct vm_page *pg;
1370 			uint8_t *start;
1371 			size_t chunkoff, chunklen;
1372 
1373 			pg = pgs[i];
1374 			if (pg == NULL)
1375 				break;
1376 
1377 			KASSERT(pg->uobject != NULL);
1378 			KASSERT(uobj->vmobjlock == pg->uobject->vmobjlock);
1379 
1380 			chunkoff = off & PAGE_MASK;
1381 			chunklen = MIN(PAGE_SIZE - chunkoff, len);
1382 			start = (uint8_t *)pg->uanon + chunkoff;
1383 
1384 			memset(start, 0, chunklen);
1385 			uvm_pagemarkdirty(pg, UVM_PAGE_STATUS_DIRTY);
1386 
1387 			off += chunklen;
1388 			len -= chunklen;
1389 		}
1390 		uvm_page_unbusy(pgs, npages);
1391 	}
1392 	rw_exit(uobj->vmobjlock);
1393 	kmem_free(pgs, maxpages * sizeof(pgs));
1394 }
1395 
1396 #define len2npages(off, len)						\
1397     ((round_page(off+len) - trunc_page(off)) >> PAGE_SHIFT)
1398 
1399 int
ubc_uiomove(struct uvm_object * uobj,struct uio * uio,vsize_t todo,int advice,int flags)1400 ubc_uiomove(struct uvm_object *uobj, struct uio *uio, vsize_t todo,
1401 	int advice, int flags)
1402 {
1403 	struct vm_page **pgs;
1404 	int npages = len2npages(uio->uio_offset, todo);
1405 	size_t pgalloc;
1406 	int i, rv, pagerflags;
1407 	vm_prot_t prot;
1408 
1409 	pgalloc = npages * sizeof(pgs);
1410 	pgs = kmem_alloc(pgalloc, KM_SLEEP);
1411 
1412 	pagerflags = PAGERFLAGS;
1413 	if (flags & UBC_WRITE)
1414 		pagerflags |= PGO_PASTEOF;
1415 	if (flags & UBC_FAULTBUSY)
1416 		pagerflags |= PGO_OVERWRITE;
1417 
1418 	prot = VM_PROT_READ;
1419 	if (flags & UBC_WRITE)
1420 		prot |= VM_PROT_WRITE;
1421 
1422 	rw_enter(uobj->vmobjlock, RW_WRITER);
1423 	do {
1424 		npages = len2npages(uio->uio_offset, todo);
1425 		memset(pgs, 0, pgalloc);
1426 		rv = uobj->pgops->pgo_get(uobj, trunc_page(uio->uio_offset),
1427 		    pgs, &npages, 0, prot, 0, pagerflags);
1428 		if (rv)
1429 			goto out;
1430 
1431 		rw_enter(uobj->vmobjlock, RW_WRITER);
1432 		for (i = 0; i < npages; i++) {
1433 			struct vm_page *pg;
1434 			size_t xfersize;
1435 			off_t pageoff;
1436 
1437 			pg = pgs[i];
1438 			if (pg == NULL)
1439 				break;
1440 
1441 			KASSERT(pg->uobject != NULL);
1442 			KASSERT(uobj->vmobjlock == pg->uobject->vmobjlock);
1443 			pageoff = uio->uio_offset & PAGE_MASK;
1444 
1445 			xfersize = MIN(MIN(todo, PAGE_SIZE), PAGE_SIZE-pageoff);
1446 			KASSERT(xfersize > 0);
1447 			rv = uiomove((uint8_t *)pg->uanon + pageoff,
1448 			    xfersize, uio);
1449 			if (rv) {
1450 				uvm_page_unbusy(pgs, npages);
1451 				rw_exit(uobj->vmobjlock);
1452 				goto out;
1453 			}
1454 			if (uio->uio_rw == UIO_WRITE) {
1455 				pg->flags &= ~PG_FAKE;
1456 				uvm_pagemarkdirty(pg, UVM_PAGE_STATUS_DIRTY);
1457 			}
1458 			todo -= xfersize;
1459 		}
1460 		uvm_page_unbusy(pgs, npages);
1461 	} while (todo);
1462 	rw_exit(uobj->vmobjlock);
1463 
1464  out:
1465 	kmem_free(pgs, pgalloc);
1466 	return rv;
1467 }
1468