xref: /openbsd-src/sys/uvm/uvm_glue.c (revision 50b7afb2c2c0993b0894d4e34bf857cb13ed9c80)
1 /*	$OpenBSD: uvm_glue.c,v 1.66 2014/07/11 16:35:40 jsg Exp $	*/
2 /*	$NetBSD: uvm_glue.c,v 1.44 2001/02/06 19:54:44 eeh Exp $	*/
3 
4 /*
5  * Copyright (c) 1997 Charles D. Cranor and Washington University.
6  * Copyright (c) 1991, 1993, The Regents of the University of California.
7  *
8  * All rights reserved.
9  *
10  * This code is derived from software contributed to Berkeley by
11  * The Mach Operating System project at Carnegie-Mellon University.
12  *
13  * Redistribution and use in source and binary forms, with or without
14  * modification, are permitted provided that the following conditions
15  * are met:
16  * 1. Redistributions of source code must retain the above copyright
17  *    notice, this list of conditions and the following disclaimer.
18  * 2. Redistributions in binary form must reproduce the above copyright
19  *    notice, this list of conditions and the following disclaimer in the
20  *    documentation and/or other materials provided with the distribution.
21  * 3. Neither the name of the University nor the names of its contributors
22  *    may be used to endorse or promote products derived from this software
23  *    without specific prior written permission.
24  *
25  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
26  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
28  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
29  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
30  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
31  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
32  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
33  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
34  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
35  * SUCH DAMAGE.
36  *
37  *	@(#)vm_glue.c	8.6 (Berkeley) 1/5/94
38  * from: Id: uvm_glue.c,v 1.1.2.8 1998/02/07 01:16:54 chs Exp
39  *
40  *
41  * Copyright (c) 1987, 1990 Carnegie-Mellon University.
42  * All rights reserved.
43  *
44  * Permission to use, copy, modify and distribute this software and
45  * its documentation is hereby granted, provided that both the copyright
46  * notice and this permission notice appear in all copies of the
47  * software, derivative works or modified versions, and any portions
48  * thereof, and that both notices appear in supporting documentation.
49  *
50  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
51  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
52  * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
53  *
54  * Carnegie Mellon requests users of this software to return to
55  *
56  *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
57  *  School of Computer Science
58  *  Carnegie Mellon University
59  *  Pittsburgh PA 15213-3890
60  *
61  * any improvements or extensions that they make and grant Carnegie the
62  * rights to redistribute these changes.
63  */
64 
65 /*
66  * uvm_glue.c: glue functions
67  */
68 
69 #include <sys/param.h>
70 #include <sys/systm.h>
71 #include <sys/proc.h>
72 #include <sys/resourcevar.h>
73 #include <sys/buf.h>
74 #include <sys/user.h>
75 #ifdef SYSVSHM
76 #include <sys/shm.h>
77 #endif
78 #include <sys/sched.h>
79 
80 #include <uvm/uvm.h>
81 
82 /*
83  * uvm_kernacc: can the kernel access a region of memory
84  *
85  * - called from malloc [DIAGNOSTIC], and /dev/kmem driver (mem.c)
86  */
87 boolean_t
88 uvm_kernacc(caddr_t addr, size_t len, int rw)
89 {
90 	boolean_t rv;
91 	vaddr_t saddr, eaddr;
92 	vm_prot_t prot = rw == B_READ ? VM_PROT_READ : VM_PROT_WRITE;
93 
94 	saddr = trunc_page((vaddr_t)addr);
95 	eaddr = round_page((vaddr_t)addr + len);
96 	vm_map_lock_read(kernel_map);
97 	rv = uvm_map_checkprot(kernel_map, saddr, eaddr, prot);
98 	vm_map_unlock_read(kernel_map);
99 
100 	return(rv);
101 }
102 
103 #ifdef KGDB
104 /*
105  * Change protections on kernel pages from addr to addr+len
106  * (presumably so debugger can plant a breakpoint).
107  *
108  * We force the protection change at the pmap level.  If we were
109  * to use vm_map_protect a change to allow writing would be lazily-
110  * applied meaning we would still take a protection fault, something
111  * we really don't want to do.  It would also fragment the kernel
112  * map unnecessarily.  We cannot use pmap_protect since it also won't
113  * enforce a write-enable request.  Using pmap_enter is the only way
114  * we can ensure the change takes place properly.
115  */
116 void
117 uvm_chgkprot(caddr_t addr, size_t len, int rw)
118 {
119 	vm_prot_t prot;
120 	paddr_t pa;
121 	vaddr_t sva, eva;
122 
123 	prot = rw == B_READ ? VM_PROT_READ : VM_PROT_READ|VM_PROT_WRITE;
124 	eva = round_page((vaddr_t)addr + len);
125 	for (sva = trunc_page((vaddr_t)addr); sva < eva; sva += PAGE_SIZE) {
126 		/*
127 		 * Extract physical address for the page.
128 		 * We use a cheezy hack to differentiate physical
129 		 * page 0 from an invalid mapping, not that it
130 		 * really matters...
131 		 */
132 		if (pmap_extract(pmap_kernel(), sva, &pa) == FALSE)
133 			panic("chgkprot: invalid page");
134 		pmap_enter(pmap_kernel(), sva, pa, prot, PMAP_WIRED);
135 	}
136 	pmap_update(pmap_kernel());
137 }
138 #endif
139 
140 /*
141  * uvm_vslock: wire user memory for I/O
142  *
143  * - called from physio and sys___sysctl
144  */
145 
146 int
147 uvm_vslock(struct proc *p, caddr_t addr, size_t len, vm_prot_t access_type)
148 {
149 	struct vm_map *map;
150 	vaddr_t start, end;
151 	int rv;
152 
153 	map = &p->p_vmspace->vm_map;
154 	start = trunc_page((vaddr_t)addr);
155 	end = round_page((vaddr_t)addr + len);
156 	if (end <= start)
157 		return (EINVAL);
158 
159 	rv = uvm_fault_wire(map, start, end, access_type);
160 
161 	return (rv);
162 }
163 
164 /*
165  * uvm_vsunlock: unwire user memory wired by uvm_vslock()
166  *
167  * - called from physio and sys___sysctl
168  */
169 
170 void
171 uvm_vsunlock(struct proc *p, caddr_t addr, size_t len)
172 {
173 	vaddr_t start, end;
174 
175 	start = trunc_page((vaddr_t)addr);
176 	end = round_page((vaddr_t)addr + len);
177 	if (end <= start)
178 		return;
179 
180 	uvm_fault_unwire(&p->p_vmspace->vm_map, start, end);
181 }
182 
183 /*
184  * uvm_vslock_device: wire user memory, make sure it's device reachable
185  *  and bounce if necessary.
186  * Always bounces for now.
187  */
188 int
189 uvm_vslock_device(struct proc *p, void *addr, size_t len,
190     vm_prot_t access_type, void **retp)
191 {
192 	struct vm_page *pg;
193 	struct pglist pgl;
194 	int npages;
195 	vaddr_t start, end, off;
196 	vaddr_t sva, va;
197 	vsize_t sz;
198 	int error, i;
199 
200 	start = trunc_page((vaddr_t)addr);
201 	end = round_page((vaddr_t)addr + len);
202 	sz = end - start;
203 	off = (vaddr_t)addr - start;
204 	if (end <= start)
205 		return (EINVAL);
206 
207 	if ((error = uvm_fault_wire(&p->p_vmspace->vm_map, start, end,
208 	    access_type))) {
209 		return (error);
210 	}
211 
212 	npages = atop(sz);
213 	for (i = 0; i < npages; i++) {
214 		paddr_t pa;
215 
216 		if (!pmap_extract(p->p_vmspace->vm_map.pmap,
217 		    start + ptoa(i), &pa)) {
218 			error = EFAULT;
219 			goto out_unwire;
220 		}
221 		if (!PADDR_IS_DMA_REACHABLE(pa))
222 			break;
223 	}
224 	if (i == npages) {
225 		*retp = NULL;
226 		return (0);
227 	}
228 
229 	if ((va = uvm_km_valloc(kernel_map, sz)) == 0) {
230 		error = ENOMEM;
231 		goto out_unwire;
232 	}
233 	sva = va;
234 
235 	TAILQ_INIT(&pgl);
236 	error = uvm_pglistalloc(npages * PAGE_SIZE, dma_constraint.ucr_low,
237 	    dma_constraint.ucr_high, 0, 0, &pgl, npages, UVM_PLA_WAITOK);
238 	if (error)
239 		goto out_unmap;
240 
241 	while ((pg = TAILQ_FIRST(&pgl)) != NULL) {
242 		TAILQ_REMOVE(&pgl, pg, pageq);
243 		pmap_kenter_pa(va, VM_PAGE_TO_PHYS(pg),
244 		    VM_PROT_READ|VM_PROT_WRITE);
245 		va += PAGE_SIZE;
246 	}
247 	pmap_update(pmap_kernel());
248 	KASSERT(va == sva + sz);
249 	*retp = (void *)(sva + off);
250 
251 	if ((error = copyin(addr, *retp, len)) == 0)
252 		return 0;
253 
254 	uvm_km_pgremove_intrsafe(sva, sva + sz);
255 	pmap_kremove(sva, sz);
256 	pmap_update(pmap_kernel());
257 out_unmap:
258 	uvm_km_free(kernel_map, sva, sz);
259 out_unwire:
260 	uvm_fault_unwire(&p->p_vmspace->vm_map, start, end);
261 	return (error);
262 }
263 
264 void
265 uvm_vsunlock_device(struct proc *p, void *addr, size_t len, void *map)
266 {
267 	vaddr_t start, end;
268 	vaddr_t kva;
269 	vsize_t sz;
270 
271 	start = trunc_page((vaddr_t)addr);
272 	end = round_page((vaddr_t)addr + len);
273 	sz = end - start;
274 	if (end <= start)
275 		return;
276 
277 	if (map)
278 		copyout(map, addr, len);
279 	uvm_fault_unwire(&p->p_vmspace->vm_map, start, end);
280 
281 	if (!map)
282 		return;
283 
284 	kva = trunc_page((vaddr_t)map);
285 	uvm_km_pgremove_intrsafe(kva, kva + sz);
286 	pmap_kremove(kva, sz);
287 	pmap_update(pmap_kernel());
288 	uvm_km_free(kernel_map, kva, sz);
289 }
290 
291 /*
292  * uvm_uarea_alloc: allocate the u-area for a new thread
293  */
294 vaddr_t
295 uvm_uarea_alloc(void)
296 {
297 	vaddr_t uaddr;
298 
299 	uaddr = uvm_km_kmemalloc_pla(kernel_map, uvm.kernel_object, USPACE,
300 	    USPACE_ALIGN, UVM_KMF_ZERO,
301 	    no_constraint.ucr_low, no_constraint.ucr_high,
302 	    0, 0, USPACE/PAGE_SIZE);
303 
304 #ifdef PMAP_UAREA
305 	/* Tell the pmap this is a u-area mapping */
306 	if (uaddr != 0)
307 		PMAP_UAREA(uaddr);
308 #endif
309 
310 	return (uaddr);
311 }
312 
313 /*
314  * uvm_uarea_free: free a dead thread's stack
315  *
316  * - the thread passed to us is a dead thread; we
317  *   are running on a different context now (the reaper).
318  */
319 void
320 uvm_uarea_free(struct proc *p)
321 {
322 	uvm_km_free(kernel_map, (vaddr_t)p->p_addr, USPACE);
323 	p->p_addr = NULL;
324 }
325 
326 /*
327  * uvm_exit: exit a virtual address space
328  */
329 void
330 uvm_exit(struct process *pr)
331 {
332 	uvmspace_free(pr->ps_vmspace);
333 	pr->ps_vmspace = NULL;
334 }
335 
336 /*
337  * uvm_init_limit: init per-process VM limits
338  *
339  * - called for process 0 and then inherited by all others.
340  */
341 void
342 uvm_init_limits(struct proc *p)
343 {
344 
345 	/*
346 	 * Set up the initial limits on process VM.  Set the maximum
347 	 * resident set size to be all of (reasonably) available memory.
348 	 * This causes any single, large process to start random page
349 	 * replacement once it fills memory.
350 	 */
351 	p->p_rlimit[RLIMIT_STACK].rlim_cur = DFLSSIZ;
352 	p->p_rlimit[RLIMIT_STACK].rlim_max = MAXSSIZ;
353 	p->p_rlimit[RLIMIT_DATA].rlim_cur = DFLDSIZ;
354 	p->p_rlimit[RLIMIT_DATA].rlim_max = MAXDSIZ;
355 	p->p_rlimit[RLIMIT_RSS].rlim_cur = ptoa(uvmexp.free);
356 }
357 
358 #ifdef DEBUG
359 int	enableswap = 1;
360 int	swapdebug = 0;
361 #define	SDB_FOLLOW	1
362 #define SDB_SWAPIN	2
363 #define SDB_SWAPOUT	4
364 #endif
365 
366 
367 /*
368  * swapout_threads: find threads that can be swapped
369  *
370  * - called by the pagedaemon
371  * - try and swap at least one processs
372  * - processes that are sleeping or stopped for maxslp or more seconds
373  *   are swapped... otherwise the longest-sleeping or stopped process
374  *   is swapped, otherwise the longest resident process...
375  */
376 void
377 uvm_swapout_threads(void)
378 {
379 	struct process *pr;
380 	struct proc *p, *slpp;
381 	struct process *outpr;
382 	int outpri;
383 	int didswap = 0;
384 	extern int maxslp;
385 	/* XXXCDC: should move off to uvmexp. or uvm., also in uvm_meter */
386 
387 #ifdef DEBUG
388 	if (!enableswap)
389 		return;
390 #endif
391 
392 	/*
393 	 * outpr/outpri  : stop/sleep process whose most active thread has
394 	 *	the largest sleeptime < maxslp
395 	 */
396 	outpr = NULL;
397 	outpri = 0;
398 	LIST_FOREACH(pr, &allprocess, ps_list) {
399 		if (pr->ps_flags & (PS_SYSTEM | PS_EXITING))
400 			continue;
401 
402 		/*
403 		 * slpp: the sleeping or stopped thread in pr with
404 		 * the smallest p_slptime
405 		 */
406 		slpp = NULL;
407 		TAILQ_FOREACH(p, &pr->ps_threads, p_thr_link) {
408 			switch (p->p_stat) {
409 			case SRUN:
410 			case SONPROC:
411 				goto next_process;
412 
413 			case SSLEEP:
414 			case SSTOP:
415 				if (slpp == NULL ||
416 				    slpp->p_slptime < p->p_slptime)
417 					slpp = p;
418 				continue;
419 			}
420 		}
421 
422 		if (slpp != NULL) {
423 			if (slpp->p_slptime >= maxslp) {
424 				pmap_collect(pr->ps_vmspace->vm_map.pmap);
425 				didswap++;
426 			} else if (slpp->p_slptime > outpri) {
427 				outpr = pr;
428 				outpri = slpp->p_slptime;
429 			}
430 		}
431 next_process:	;
432 	}
433 
434 	/*
435 	 * If we didn't get rid of any real duds, toss out the next most
436 	 * likely sleeping/stopped or running candidate.  We only do this
437 	 * if we are real low on memory since we don't gain much by doing
438 	 * it.
439 	 */
440 	if (didswap == 0 && uvmexp.free <= atop(round_page(USPACE)) &&
441 	    outpr != NULL) {
442 #ifdef DEBUG
443 		if (swapdebug & SDB_SWAPOUT)
444 			printf("swapout_threads: no duds, try procpr %p\n",
445 			    outpr);
446 #endif
447 		pmap_collect(outpr->ps_vmspace->vm_map.pmap);
448 	}
449 }
450 
451 /*
452  * uvm_atopg: convert KVAs back to their page structures.
453  */
454 struct vm_page *
455 uvm_atopg(vaddr_t kva)
456 {
457 	struct vm_page *pg;
458 	paddr_t pa;
459 	boolean_t rv;
460 
461 	rv = pmap_extract(pmap_kernel(), kva, &pa);
462 	KASSERT(rv);
463 	pg = PHYS_TO_VM_PAGE(pa);
464 	KASSERT(pg != NULL);
465 	return (pg);
466 }
467 
468 void
469 uvm_pause(void)
470 {
471 	KERNEL_UNLOCK();
472 	KERNEL_LOCK();
473 	if (curcpu()->ci_schedstate.spc_schedflags & SPCF_SHOULDYIELD)
474 		preempt(NULL);
475 }
476