xref: /netbsd-src/sys/uvm/uvm_glue.c (revision bada23909e740596d0a3785a73bd3583a9807fb8)
1 /*	$NetBSD: uvm_glue.c,v 1.16 1999/03/15 07:55:19 chs Exp $	*/
2 
3 /*
4  * XXXCDC: "ROUGH DRAFT" QUALITY UVM PRE-RELEASE FILE!
5  *         >>>USE AT YOUR OWN RISK, WORK IS NOT FINISHED<<<
6  */
7 /*
8  * Copyright (c) 1997 Charles D. Cranor and Washington University.
9  * Copyright (c) 1991, 1993, The Regents of the University of California.
10  *
11  * All rights reserved.
12  *
13  * This code is derived from software contributed to Berkeley by
14  * The Mach Operating System project at Carnegie-Mellon University.
15  *
16  * Redistribution and use in source and binary forms, with or without
17  * modification, are permitted provided that the following conditions
18  * are met:
19  * 1. Redistributions of source code must retain the above copyright
20  *    notice, this list of conditions and the following disclaimer.
21  * 2. Redistributions in binary form must reproduce the above copyright
22  *    notice, this list of conditions and the following disclaimer in the
23  *    documentation and/or other materials provided with the distribution.
24  * 3. All advertising materials mentioning features or use of this software
25  *    must display the following acknowledgement:
26  *	This product includes software developed by Charles D. Cranor,
27  *      Washington University, the University of California, Berkeley and
28  *      its contributors.
29  * 4. Neither the name of the University nor the names of its contributors
30  *    may be used to endorse or promote products derived from this software
31  *    without specific prior written permission.
32  *
33  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
34  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
35  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
36  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
37  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
38  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
39  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
40  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
41  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
42  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
43  * SUCH DAMAGE.
44  *
45  *	@(#)vm_glue.c	8.6 (Berkeley) 1/5/94
46  * from: Id: uvm_glue.c,v 1.1.2.8 1998/02/07 01:16:54 chs Exp
47  *
48  *
49  * Copyright (c) 1987, 1990 Carnegie-Mellon University.
50  * All rights reserved.
51  *
52  * Permission to use, copy, modify and distribute this software and
53  * its documentation is hereby granted, provided that both the copyright
54  * notice and this permission notice appear in all copies of the
55  * software, derivative works or modified versions, and any portions
56  * thereof, and that both notices appear in supporting documentation.
57  *
58  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
59  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
60  * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
61  *
62  * Carnegie Mellon requests users of this software to return to
63  *
64  *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
65  *  School of Computer Science
66  *  Carnegie Mellon University
67  *  Pittsburgh PA 15213-3890
68  *
69  * any improvements or extensions that they make and grant Carnegie the
70  * rights to redistribute these changes.
71  */
72 
73 #include "opt_uvmhist.h"
74 #include "opt_sysv.h"
75 
76 /*
77  * uvm_glue.c: glue functions
78  */
79 
80 #include <sys/param.h>
81 #include <sys/systm.h>
82 #include <sys/proc.h>
83 #include <sys/resourcevar.h>
84 #include <sys/buf.h>
85 #include <sys/user.h>
86 #ifdef SYSVSHM
87 #include <sys/shm.h>
88 #endif
89 
90 #include <vm/vm.h>
91 #include <vm/vm_page.h>
92 #include <vm/vm_kern.h>
93 
94 #include <uvm/uvm.h>
95 
96 #include <machine/cpu.h>
97 
98 /*
99  * local prototypes
100  */
101 
102 static void uvm_swapout __P((struct proc *));
103 
104 /*
105  * XXXCDC: do these really belong here?
106  */
107 
108 unsigned maxdmap = MAXDSIZ;	/* kern_resource.c: RLIMIT_DATA max */
109 unsigned maxsmap = MAXSSIZ;	/* kern_resource.c: RLIMIT_STACK max */
110 
111 int readbuffers = 0;		/* allow KGDB to read kern buffer pool */
112 				/* XXX: see uvm_kernacc */
113 
114 
115 /*
116  * uvm_kernacc: can the kernel access a region of memory
117  *
118  * - called from malloc [DIAGNOSTIC], and /dev/kmem driver (mem.c)
119  */
120 
121 boolean_t
122 uvm_kernacc(addr, len, rw)
123 	caddr_t addr;
124 	size_t len;
125 	int rw;
126 {
127 	boolean_t rv;
128 	vaddr_t saddr, eaddr;
129 	vm_prot_t prot = rw == B_READ ? VM_PROT_READ : VM_PROT_WRITE;
130 
131 	saddr = trunc_page(addr);
132 	eaddr = round_page(addr+len);
133 	vm_map_lock_read(kernel_map);
134 	rv = uvm_map_checkprot(kernel_map, saddr, eaddr, prot);
135 	vm_map_unlock_read(kernel_map);
136 
137 	/*
138 	 * XXX there are still some things (e.g. the buffer cache) that
139 	 * are managed behind the VM system's back so even though an
140 	 * address is accessible in the mind of the VM system, there may
141 	 * not be physical pages where the VM thinks there is.  This can
142 	 * lead to bogus allocation of pages in the kernel address space
143 	 * or worse, inconsistencies at the pmap level.  We only worry
144 	 * about the buffer cache for now.
145 	 */
146 	if (!readbuffers && rv && (eaddr > (vaddr_t)buffers &&
147 			     saddr < (vaddr_t)buffers + MAXBSIZE * nbuf))
148 		rv = FALSE;
149 	return(rv);
150 }
151 
152 /*
153  * uvm_useracc: can the user access it?
154  *
155  * - called from physio() and sys___sysctl().
156  */
157 
158 boolean_t
159 uvm_useracc(addr, len, rw)
160 	caddr_t addr;
161 	size_t len;
162 	int rw;
163 {
164 	boolean_t rv;
165 	vm_prot_t prot = rw == B_READ ? VM_PROT_READ : VM_PROT_WRITE;
166 
167 #if defined(i386) || defined(pc532)
168 	/*
169 	 * XXX - specially disallow access to user page tables - they are
170 	 * in the map.  This is here until i386 & pc532 pmaps are fixed...
171 	 */
172 	if ((vaddr_t) addr >= VM_MAXUSER_ADDRESS
173 	    || (vaddr_t) addr + len > VM_MAXUSER_ADDRESS
174 	    || (vaddr_t) addr + len <= (vaddr_t) addr)
175 		return (FALSE);
176 #endif
177 
178 	rv = uvm_map_checkprot(&curproc->p_vmspace->vm_map,
179 			trunc_page(addr), round_page(addr+len), prot);
180 	return(rv);
181 }
182 
183 #ifdef KGDB
184 /*
185  * Change protections on kernel pages from addr to addr+len
186  * (presumably so debugger can plant a breakpoint).
187  *
188  * We force the protection change at the pmap level.  If we were
189  * to use vm_map_protect a change to allow writing would be lazily-
190  * applied meaning we would still take a protection fault, something
191  * we really don't want to do.  It would also fragment the kernel
192  * map unnecessarily.  We cannot use pmap_protect since it also won't
193  * enforce a write-enable request.  Using pmap_enter is the only way
194  * we can ensure the change takes place properly.
195  */
196 void
197 uvm_chgkprot(addr, len, rw)
198 	register caddr_t addr;
199 	size_t len;
200 	int rw;
201 {
202 	vm_prot_t prot;
203 	paddr_t pa;
204 	vaddr_t sva, eva;
205 
206 	prot = rw == B_READ ? VM_PROT_READ : VM_PROT_READ|VM_PROT_WRITE;
207 	eva = round_page(addr + len);
208 	for (sva = trunc_page(addr); sva < eva; sva += PAGE_SIZE) {
209 		/*
210 		 * Extract physical address for the page.
211 		 * We use a cheezy hack to differentiate physical
212 		 * page 0 from an invalid mapping, not that it
213 		 * really matters...
214 		 */
215 		pa = pmap_extract(pmap_kernel(), sva|1);
216 		if (pa == 0)
217 			panic("chgkprot: invalid page");
218 		pmap_enter(pmap_kernel(), sva, pa&~1, prot, TRUE);
219 	}
220 }
221 #endif
222 
223 /*
224  * vslock: wire user memory for I/O
225  *
226  * - called from physio and sys___sysctl
227  * - XXXCDC: consider nuking this (or making it a macro?)
228  */
229 
230 void
231 uvm_vslock(p, addr, len)
232 	struct proc *p;
233 	caddr_t	addr;
234 	size_t	len;
235 {
236 	uvm_fault_wire(&p->p_vmspace->vm_map, trunc_page(addr),
237 	    round_page(addr+len));
238 }
239 
240 /*
241  * vslock: wire user memory for I/O
242  *
243  * - called from physio and sys___sysctl
244  * - XXXCDC: consider nuking this (or making it a macro?)
245  */
246 
247 void
248 uvm_vsunlock(p, addr, len)
249 	struct proc *p;
250 	caddr_t	addr;
251 	size_t	len;
252 {
253 	uvm_fault_unwire(p->p_vmspace->vm_map.pmap, trunc_page(addr),
254 		round_page(addr+len));
255 }
256 
257 /*
258  * uvm_fork: fork a virtual address space
259  *
260  * - the address space is copied as per parent map's inherit values
261  * - a new "user" structure is allocated for the child process
262  *	[filled in by MD layer...]
263  * - NOTE: the kernel stack may be at a different location in the child
264  *	process, and thus addresses of automatic variables may be invalid
265  *	after cpu_fork returns in the child process.  We do nothing here
266  *	after cpu_fork returns.
267  * - XXXCDC: we need a way for this to return a failure value rather
268  *   than just hang
269  */
270 void
271 uvm_fork(p1, p2, shared)
272 	struct proc *p1, *p2;
273 	boolean_t shared;
274 {
275 	struct user *up = p2->p_addr;
276 	int rv;
277 
278 	if (shared == TRUE)
279 		uvmspace_share(p1, p2);			/* share vmspace */
280 	else
281 		p2->p_vmspace = uvmspace_fork(p1->p_vmspace); /* fork vmspace */
282 
283 	/*
284 	 * Wire down the U-area for the process, which contains the PCB
285 	 * and the kernel stack.  Wired state is stored in p->p_flag's
286 	 * P_INMEM bit rather than in the vm_map_entry's wired count
287 	 * to prevent kernel_map fragmentation.
288 	 */
289 	rv = uvm_fault_wire(kernel_map, (vaddr_t)up,
290 	    (vaddr_t)up + USPACE);
291 	if (rv != KERN_SUCCESS)
292 		panic("uvm_fork: uvm_fault_wire failed: %d", rv);
293 
294 	/*
295 	 * p_stats and p_sigacts currently point at fields in the user
296 	 * struct but not at &u, instead at p_addr.  Copy p_sigacts and
297 	 * parts of p_stats; zero the rest of p_stats (statistics).
298 	 */
299 	p2->p_stats = &up->u_stats;
300 	p2->p_sigacts = &up->u_sigacts;
301 	up->u_sigacts = *p1->p_sigacts;
302 	memset(&up->u_stats.pstat_startzero, 0,
303 	(unsigned) ((caddr_t)&up->u_stats.pstat_endzero -
304 		    (caddr_t)&up->u_stats.pstat_startzero));
305 	memcpy(&up->u_stats.pstat_startcopy, &p1->p_stats->pstat_startcopy,
306 	((caddr_t)&up->u_stats.pstat_endcopy -
307 	 (caddr_t)&up->u_stats.pstat_startcopy));
308 
309 	/*
310 	 * cpu_fork will copy and update the kernel stack and pcb, and make
311 	 * the child ready to run.  The child will exit directly to user
312 	 * mode on its first time slice, and will not return here.
313 	 */
314 	cpu_fork(p1, p2);
315 }
316 
317 /*
318  * uvm_exit: exit a virtual address space
319  *
320  * - the process passed to us is a dead (pre-zombie) process; we
321  *   are running on a different context now (the reaper).
322  * - we must run in a separate thread because freeing the vmspace
323  *   of the dead process may block.
324  */
325 void
326 uvm_exit(p)
327 	struct proc *p;
328 {
329 
330 	uvmspace_free(p->p_vmspace);
331 	uvm_km_free(kernel_map, (vaddr_t)p->p_addr, USPACE);
332 }
333 
334 /*
335  * uvm_init_limit: init per-process VM limits
336  *
337  * - called for process 0 and then inherited by all others.
338  */
339 void
340 uvm_init_limits(p)
341 	struct proc *p;
342 {
343 
344 	/*
345 	 * Set up the initial limits on process VM.  Set the maximum
346 	 * resident set size to be all of (reasonably) available memory.
347 	 * This causes any single, large process to start random page
348 	 * replacement once it fills memory.
349 	 */
350 
351 	p->p_rlimit[RLIMIT_STACK].rlim_cur = DFLSSIZ;
352 	p->p_rlimit[RLIMIT_STACK].rlim_max = MAXSSIZ;
353 	p->p_rlimit[RLIMIT_DATA].rlim_cur = DFLDSIZ;
354 	p->p_rlimit[RLIMIT_DATA].rlim_max = MAXDSIZ;
355 	p->p_rlimit[RLIMIT_RSS].rlim_cur = ptoa(uvmexp.free);
356 }
357 
358 #ifdef DEBUG
359 int	enableswap = 1;
360 int	swapdebug = 0;
361 #define	SDB_FOLLOW	1
362 #define SDB_SWAPIN	2
363 #define SDB_SWAPOUT	4
364 #endif
365 
366 /*
367  * uvm_swapin: swap in a process's u-area.
368  */
369 
370 void
371 uvm_swapin(p)
372 	struct proc *p;
373 {
374 	vaddr_t addr;
375 	int s;
376 
377 	addr = (vaddr_t)p->p_addr;
378 	/* make P_INMEM true */
379 	uvm_fault_wire(kernel_map, addr, addr + USPACE);
380 
381 	/*
382 	 * Some architectures need to be notified when the user area has
383 	 * moved to new physical page(s) (e.g.  see mips/mips/vm_machdep.c).
384 	 */
385 	cpu_swapin(p);
386 	s = splstatclock();
387 	if (p->p_stat == SRUN)
388 		setrunqueue(p);
389 	p->p_flag |= P_INMEM;
390 	splx(s);
391 	p->p_swtime = 0;
392 	++uvmexp.swapins;
393 }
394 
395 /*
396  * uvm_scheduler: process zero main loop
397  *
398  * - attempt to swapin every swaped-out, runnable process in order of
399  *	priority.
400  * - if not enough memory, wake the pagedaemon and let it clear space.
401  */
402 
403 void
404 uvm_scheduler()
405 {
406 	register struct proc *p;
407 	register int pri;
408 	struct proc *pp;
409 	int ppri;
410 	UVMHIST_FUNC("uvm_scheduler"); UVMHIST_CALLED(maphist);
411 
412 loop:
413 #ifdef DEBUG
414 	while (!enableswap)
415 		tsleep((caddr_t)&proc0, PVM, "noswap", 0);
416 #endif
417 	pp = NULL;		/* process to choose */
418 	ppri = INT_MIN;	/* its priority */
419 	for (p = allproc.lh_first; p != 0; p = p->p_list.le_next) {
420 
421 		/* is it a runnable swapped out process? */
422 		if (p->p_stat == SRUN && (p->p_flag & P_INMEM) == 0) {
423 			pri = p->p_swtime + p->p_slptime -
424 			    (p->p_nice - NZERO) * 8;
425 			if (pri > ppri) {   /* higher priority?  remember it. */
426 				pp = p;
427 				ppri = pri;
428 			}
429 		}
430 	}
431 
432 #ifdef DEBUG
433 	if (swapdebug & SDB_FOLLOW)
434 		printf("scheduler: running, procp %p pri %d\n", pp, ppri);
435 #endif
436 	/*
437 	 * Nothing to do, back to sleep
438 	 */
439 	if ((p = pp) == NULL) {
440 		tsleep((caddr_t)&proc0, PVM, "scheduler", 0);
441 		goto loop;
442 	}
443 
444 	/*
445 	 * we have found swapped out process which we would like to bring
446 	 * back in.
447 	 *
448 	 * XXX: this part is really bogus cuz we could deadlock on memory
449 	 * despite our feeble check
450 	 */
451 	if (uvmexp.free > atop(USPACE)) {
452 #ifdef DEBUG
453 		if (swapdebug & SDB_SWAPIN)
454 			printf("swapin: pid %d(%s)@%p, pri %d free %d\n",
455 	     p->p_pid, p->p_comm, p->p_addr, ppri, uvmexp.free);
456 #endif
457 		uvm_swapin(p);
458 		goto loop;
459 	}
460 	/*
461 	 * not enough memory, jab the pageout daemon and wait til the coast
462 	 * is clear
463 	 */
464 #ifdef DEBUG
465 	if (swapdebug & SDB_FOLLOW)
466 		printf("scheduler: no room for pid %d(%s), free %d\n",
467 	   p->p_pid, p->p_comm, uvmexp.free);
468 #endif
469 	(void) splhigh();
470 	uvm_wait("schedpwait");
471 	(void) spl0();
472 #ifdef DEBUG
473 	if (swapdebug & SDB_FOLLOW)
474 		printf("scheduler: room again, free %d\n", uvmexp.free);
475 #endif
476 	goto loop;
477 }
478 
479 /*
480  * swappable: is process "p" swappable?
481  */
482 
483 #define	swappable(p)							\
484 	(((p)->p_flag & (P_SYSTEM | P_INMEM | P_WEXIT)) == P_INMEM &&	\
485 	 (p)->p_holdcnt == 0)
486 
487 /*
488  * swapout_threads: find threads that can be swapped and unwire their
489  *	u-areas.
490  *
491  * - called by the pagedaemon
492  * - try and swap at least one processs
493  * - processes that are sleeping or stopped for maxslp or more seconds
494  *   are swapped... otherwise the longest-sleeping or stopped process
495  *   is swapped, otherwise the longest resident process...
496  */
497 void
498 uvm_swapout_threads()
499 {
500 	register struct proc *p;
501 	struct proc *outp, *outp2;
502 	int outpri, outpri2;
503 	int didswap = 0;
504 	extern int maxslp;
505 	/* XXXCDC: should move off to uvmexp. or uvm., also in uvm_meter */
506 
507 #ifdef DEBUG
508 	if (!enableswap)
509 		return;
510 #endif
511 
512 	/*
513 	 * outp/outpri  : stop/sleep process with largest sleeptime < maxslp
514 	 * outp2/outpri2: the longest resident process (its swap time)
515 	 */
516 	outp = outp2 = NULL;
517 	outpri = outpri2 = 0;
518 	for (p = allproc.lh_first; p != 0; p = p->p_list.le_next) {
519 		if (!swappable(p))
520 			continue;
521 		switch (p->p_stat) {
522 		case SRUN:
523 			if (p->p_swtime > outpri2) {
524 				outp2 = p;
525 				outpri2 = p->p_swtime;
526 			}
527 			continue;
528 
529 		case SSLEEP:
530 		case SSTOP:
531 			if (p->p_slptime >= maxslp) {
532 				uvm_swapout(p);			/* zap! */
533 				didswap++;
534 			} else if (p->p_slptime > outpri) {
535 				outp = p;
536 				outpri = p->p_slptime;
537 			}
538 			continue;
539 		}
540 	}
541 
542 	/*
543 	 * If we didn't get rid of any real duds, toss out the next most
544 	 * likely sleeping/stopped or running candidate.  We only do this
545 	 * if we are real low on memory since we don't gain much by doing
546 	 * it (USPACE bytes).
547 	 */
548 	if (didswap == 0 && uvmexp.free <= atop(round_page(USPACE))) {
549 		if ((p = outp) == NULL)
550 			p = outp2;
551 #ifdef DEBUG
552 		if (swapdebug & SDB_SWAPOUT)
553 			printf("swapout_threads: no duds, try procp %p\n", p);
554 #endif
555 		if (p)
556 			uvm_swapout(p);
557 	}
558 }
559 
560 /*
561  * uvm_swapout: swap out process "p"
562  *
563  * - currently "swapout" means "unwire U-area" and "pmap_collect()"
564  *   the pmap.
565  * - XXXCDC: should deactivate all process' private anonymous memory
566  */
567 
568 static void
569 uvm_swapout(p)
570 	register struct proc *p;
571 {
572 	vaddr_t addr;
573 	int s;
574 
575 #ifdef DEBUG
576 	if (swapdebug & SDB_SWAPOUT)
577 		printf("swapout: pid %d(%s)@%p, stat %x pri %d free %d\n",
578 	   p->p_pid, p->p_comm, p->p_addr, p->p_stat,
579 	   p->p_slptime, uvmexp.free);
580 #endif
581 
582 	/*
583 	 * Do any machine-specific actions necessary before swapout.
584 	 * This can include saving floating point state, etc.
585 	 */
586 	cpu_swapout(p);
587 
588 	/*
589 	 * Unwire the to-be-swapped process's user struct and kernel stack.
590 	 */
591 	addr = (vaddr_t)p->p_addr;
592 	uvm_fault_unwire(kernel_map->pmap, addr, addr + USPACE); /* !P_INMEM */
593 	pmap_collect(vm_map_pmap(&p->p_vmspace->vm_map));
594 
595 	/*
596 	 * Mark it as (potentially) swapped out.
597 	 */
598 	s = splstatclock();
599 	p->p_flag &= ~P_INMEM;
600 	if (p->p_stat == SRUN)
601 		remrunqueue(p);
602 	splx(s);
603 	p->p_swtime = 0;
604 	++uvmexp.swapouts;
605 }
606 
607