1 /* $NetBSD: uvm_glue.c,v 1.16 1999/03/15 07:55:19 chs Exp $ */ 2 3 /* 4 * XXXCDC: "ROUGH DRAFT" QUALITY UVM PRE-RELEASE FILE! 5 * >>>USE AT YOUR OWN RISK, WORK IS NOT FINISHED<<< 6 */ 7 /* 8 * Copyright (c) 1997 Charles D. Cranor and Washington University. 9 * Copyright (c) 1991, 1993, The Regents of the University of California. 10 * 11 * All rights reserved. 12 * 13 * This code is derived from software contributed to Berkeley by 14 * The Mach Operating System project at Carnegie-Mellon University. 15 * 16 * Redistribution and use in source and binary forms, with or without 17 * modification, are permitted provided that the following conditions 18 * are met: 19 * 1. Redistributions of source code must retain the above copyright 20 * notice, this list of conditions and the following disclaimer. 21 * 2. Redistributions in binary form must reproduce the above copyright 22 * notice, this list of conditions and the following disclaimer in the 23 * documentation and/or other materials provided with the distribution. 24 * 3. All advertising materials mentioning features or use of this software 25 * must display the following acknowledgement: 26 * This product includes software developed by Charles D. Cranor, 27 * Washington University, the University of California, Berkeley and 28 * its contributors. 29 * 4. Neither the name of the University nor the names of its contributors 30 * may be used to endorse or promote products derived from this software 31 * without specific prior written permission. 32 * 33 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 34 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 35 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 36 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 37 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 38 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 39 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 40 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 41 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 42 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 43 * SUCH DAMAGE. 44 * 45 * @(#)vm_glue.c 8.6 (Berkeley) 1/5/94 46 * from: Id: uvm_glue.c,v 1.1.2.8 1998/02/07 01:16:54 chs Exp 47 * 48 * 49 * Copyright (c) 1987, 1990 Carnegie-Mellon University. 50 * All rights reserved. 51 * 52 * Permission to use, copy, modify and distribute this software and 53 * its documentation is hereby granted, provided that both the copyright 54 * notice and this permission notice appear in all copies of the 55 * software, derivative works or modified versions, and any portions 56 * thereof, and that both notices appear in supporting documentation. 57 * 58 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" 59 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND 60 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. 61 * 62 * Carnegie Mellon requests users of this software to return to 63 * 64 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU 65 * School of Computer Science 66 * Carnegie Mellon University 67 * Pittsburgh PA 15213-3890 68 * 69 * any improvements or extensions that they make and grant Carnegie the 70 * rights to redistribute these changes. 71 */ 72 73 #include "opt_uvmhist.h" 74 #include "opt_sysv.h" 75 76 /* 77 * uvm_glue.c: glue functions 78 */ 79 80 #include <sys/param.h> 81 #include <sys/systm.h> 82 #include <sys/proc.h> 83 #include <sys/resourcevar.h> 84 #include <sys/buf.h> 85 #include <sys/user.h> 86 #ifdef SYSVSHM 87 #include <sys/shm.h> 88 #endif 89 90 #include <vm/vm.h> 91 #include <vm/vm_page.h> 92 #include <vm/vm_kern.h> 93 94 #include <uvm/uvm.h> 95 96 #include <machine/cpu.h> 97 98 /* 99 * local prototypes 100 */ 101 102 static void uvm_swapout __P((struct proc *)); 103 104 /* 105 * XXXCDC: do these really belong here? 106 */ 107 108 unsigned maxdmap = MAXDSIZ; /* kern_resource.c: RLIMIT_DATA max */ 109 unsigned maxsmap = MAXSSIZ; /* kern_resource.c: RLIMIT_STACK max */ 110 111 int readbuffers = 0; /* allow KGDB to read kern buffer pool */ 112 /* XXX: see uvm_kernacc */ 113 114 115 /* 116 * uvm_kernacc: can the kernel access a region of memory 117 * 118 * - called from malloc [DIAGNOSTIC], and /dev/kmem driver (mem.c) 119 */ 120 121 boolean_t 122 uvm_kernacc(addr, len, rw) 123 caddr_t addr; 124 size_t len; 125 int rw; 126 { 127 boolean_t rv; 128 vaddr_t saddr, eaddr; 129 vm_prot_t prot = rw == B_READ ? VM_PROT_READ : VM_PROT_WRITE; 130 131 saddr = trunc_page(addr); 132 eaddr = round_page(addr+len); 133 vm_map_lock_read(kernel_map); 134 rv = uvm_map_checkprot(kernel_map, saddr, eaddr, prot); 135 vm_map_unlock_read(kernel_map); 136 137 /* 138 * XXX there are still some things (e.g. the buffer cache) that 139 * are managed behind the VM system's back so even though an 140 * address is accessible in the mind of the VM system, there may 141 * not be physical pages where the VM thinks there is. This can 142 * lead to bogus allocation of pages in the kernel address space 143 * or worse, inconsistencies at the pmap level. We only worry 144 * about the buffer cache for now. 145 */ 146 if (!readbuffers && rv && (eaddr > (vaddr_t)buffers && 147 saddr < (vaddr_t)buffers + MAXBSIZE * nbuf)) 148 rv = FALSE; 149 return(rv); 150 } 151 152 /* 153 * uvm_useracc: can the user access it? 154 * 155 * - called from physio() and sys___sysctl(). 156 */ 157 158 boolean_t 159 uvm_useracc(addr, len, rw) 160 caddr_t addr; 161 size_t len; 162 int rw; 163 { 164 boolean_t rv; 165 vm_prot_t prot = rw == B_READ ? VM_PROT_READ : VM_PROT_WRITE; 166 167 #if defined(i386) || defined(pc532) 168 /* 169 * XXX - specially disallow access to user page tables - they are 170 * in the map. This is here until i386 & pc532 pmaps are fixed... 171 */ 172 if ((vaddr_t) addr >= VM_MAXUSER_ADDRESS 173 || (vaddr_t) addr + len > VM_MAXUSER_ADDRESS 174 || (vaddr_t) addr + len <= (vaddr_t) addr) 175 return (FALSE); 176 #endif 177 178 rv = uvm_map_checkprot(&curproc->p_vmspace->vm_map, 179 trunc_page(addr), round_page(addr+len), prot); 180 return(rv); 181 } 182 183 #ifdef KGDB 184 /* 185 * Change protections on kernel pages from addr to addr+len 186 * (presumably so debugger can plant a breakpoint). 187 * 188 * We force the protection change at the pmap level. If we were 189 * to use vm_map_protect a change to allow writing would be lazily- 190 * applied meaning we would still take a protection fault, something 191 * we really don't want to do. It would also fragment the kernel 192 * map unnecessarily. We cannot use pmap_protect since it also won't 193 * enforce a write-enable request. Using pmap_enter is the only way 194 * we can ensure the change takes place properly. 195 */ 196 void 197 uvm_chgkprot(addr, len, rw) 198 register caddr_t addr; 199 size_t len; 200 int rw; 201 { 202 vm_prot_t prot; 203 paddr_t pa; 204 vaddr_t sva, eva; 205 206 prot = rw == B_READ ? VM_PROT_READ : VM_PROT_READ|VM_PROT_WRITE; 207 eva = round_page(addr + len); 208 for (sva = trunc_page(addr); sva < eva; sva += PAGE_SIZE) { 209 /* 210 * Extract physical address for the page. 211 * We use a cheezy hack to differentiate physical 212 * page 0 from an invalid mapping, not that it 213 * really matters... 214 */ 215 pa = pmap_extract(pmap_kernel(), sva|1); 216 if (pa == 0) 217 panic("chgkprot: invalid page"); 218 pmap_enter(pmap_kernel(), sva, pa&~1, prot, TRUE); 219 } 220 } 221 #endif 222 223 /* 224 * vslock: wire user memory for I/O 225 * 226 * - called from physio and sys___sysctl 227 * - XXXCDC: consider nuking this (or making it a macro?) 228 */ 229 230 void 231 uvm_vslock(p, addr, len) 232 struct proc *p; 233 caddr_t addr; 234 size_t len; 235 { 236 uvm_fault_wire(&p->p_vmspace->vm_map, trunc_page(addr), 237 round_page(addr+len)); 238 } 239 240 /* 241 * vslock: wire user memory for I/O 242 * 243 * - called from physio and sys___sysctl 244 * - XXXCDC: consider nuking this (or making it a macro?) 245 */ 246 247 void 248 uvm_vsunlock(p, addr, len) 249 struct proc *p; 250 caddr_t addr; 251 size_t len; 252 { 253 uvm_fault_unwire(p->p_vmspace->vm_map.pmap, trunc_page(addr), 254 round_page(addr+len)); 255 } 256 257 /* 258 * uvm_fork: fork a virtual address space 259 * 260 * - the address space is copied as per parent map's inherit values 261 * - a new "user" structure is allocated for the child process 262 * [filled in by MD layer...] 263 * - NOTE: the kernel stack may be at a different location in the child 264 * process, and thus addresses of automatic variables may be invalid 265 * after cpu_fork returns in the child process. We do nothing here 266 * after cpu_fork returns. 267 * - XXXCDC: we need a way for this to return a failure value rather 268 * than just hang 269 */ 270 void 271 uvm_fork(p1, p2, shared) 272 struct proc *p1, *p2; 273 boolean_t shared; 274 { 275 struct user *up = p2->p_addr; 276 int rv; 277 278 if (shared == TRUE) 279 uvmspace_share(p1, p2); /* share vmspace */ 280 else 281 p2->p_vmspace = uvmspace_fork(p1->p_vmspace); /* fork vmspace */ 282 283 /* 284 * Wire down the U-area for the process, which contains the PCB 285 * and the kernel stack. Wired state is stored in p->p_flag's 286 * P_INMEM bit rather than in the vm_map_entry's wired count 287 * to prevent kernel_map fragmentation. 288 */ 289 rv = uvm_fault_wire(kernel_map, (vaddr_t)up, 290 (vaddr_t)up + USPACE); 291 if (rv != KERN_SUCCESS) 292 panic("uvm_fork: uvm_fault_wire failed: %d", rv); 293 294 /* 295 * p_stats and p_sigacts currently point at fields in the user 296 * struct but not at &u, instead at p_addr. Copy p_sigacts and 297 * parts of p_stats; zero the rest of p_stats (statistics). 298 */ 299 p2->p_stats = &up->u_stats; 300 p2->p_sigacts = &up->u_sigacts; 301 up->u_sigacts = *p1->p_sigacts; 302 memset(&up->u_stats.pstat_startzero, 0, 303 (unsigned) ((caddr_t)&up->u_stats.pstat_endzero - 304 (caddr_t)&up->u_stats.pstat_startzero)); 305 memcpy(&up->u_stats.pstat_startcopy, &p1->p_stats->pstat_startcopy, 306 ((caddr_t)&up->u_stats.pstat_endcopy - 307 (caddr_t)&up->u_stats.pstat_startcopy)); 308 309 /* 310 * cpu_fork will copy and update the kernel stack and pcb, and make 311 * the child ready to run. The child will exit directly to user 312 * mode on its first time slice, and will not return here. 313 */ 314 cpu_fork(p1, p2); 315 } 316 317 /* 318 * uvm_exit: exit a virtual address space 319 * 320 * - the process passed to us is a dead (pre-zombie) process; we 321 * are running on a different context now (the reaper). 322 * - we must run in a separate thread because freeing the vmspace 323 * of the dead process may block. 324 */ 325 void 326 uvm_exit(p) 327 struct proc *p; 328 { 329 330 uvmspace_free(p->p_vmspace); 331 uvm_km_free(kernel_map, (vaddr_t)p->p_addr, USPACE); 332 } 333 334 /* 335 * uvm_init_limit: init per-process VM limits 336 * 337 * - called for process 0 and then inherited by all others. 338 */ 339 void 340 uvm_init_limits(p) 341 struct proc *p; 342 { 343 344 /* 345 * Set up the initial limits on process VM. Set the maximum 346 * resident set size to be all of (reasonably) available memory. 347 * This causes any single, large process to start random page 348 * replacement once it fills memory. 349 */ 350 351 p->p_rlimit[RLIMIT_STACK].rlim_cur = DFLSSIZ; 352 p->p_rlimit[RLIMIT_STACK].rlim_max = MAXSSIZ; 353 p->p_rlimit[RLIMIT_DATA].rlim_cur = DFLDSIZ; 354 p->p_rlimit[RLIMIT_DATA].rlim_max = MAXDSIZ; 355 p->p_rlimit[RLIMIT_RSS].rlim_cur = ptoa(uvmexp.free); 356 } 357 358 #ifdef DEBUG 359 int enableswap = 1; 360 int swapdebug = 0; 361 #define SDB_FOLLOW 1 362 #define SDB_SWAPIN 2 363 #define SDB_SWAPOUT 4 364 #endif 365 366 /* 367 * uvm_swapin: swap in a process's u-area. 368 */ 369 370 void 371 uvm_swapin(p) 372 struct proc *p; 373 { 374 vaddr_t addr; 375 int s; 376 377 addr = (vaddr_t)p->p_addr; 378 /* make P_INMEM true */ 379 uvm_fault_wire(kernel_map, addr, addr + USPACE); 380 381 /* 382 * Some architectures need to be notified when the user area has 383 * moved to new physical page(s) (e.g. see mips/mips/vm_machdep.c). 384 */ 385 cpu_swapin(p); 386 s = splstatclock(); 387 if (p->p_stat == SRUN) 388 setrunqueue(p); 389 p->p_flag |= P_INMEM; 390 splx(s); 391 p->p_swtime = 0; 392 ++uvmexp.swapins; 393 } 394 395 /* 396 * uvm_scheduler: process zero main loop 397 * 398 * - attempt to swapin every swaped-out, runnable process in order of 399 * priority. 400 * - if not enough memory, wake the pagedaemon and let it clear space. 401 */ 402 403 void 404 uvm_scheduler() 405 { 406 register struct proc *p; 407 register int pri; 408 struct proc *pp; 409 int ppri; 410 UVMHIST_FUNC("uvm_scheduler"); UVMHIST_CALLED(maphist); 411 412 loop: 413 #ifdef DEBUG 414 while (!enableswap) 415 tsleep((caddr_t)&proc0, PVM, "noswap", 0); 416 #endif 417 pp = NULL; /* process to choose */ 418 ppri = INT_MIN; /* its priority */ 419 for (p = allproc.lh_first; p != 0; p = p->p_list.le_next) { 420 421 /* is it a runnable swapped out process? */ 422 if (p->p_stat == SRUN && (p->p_flag & P_INMEM) == 0) { 423 pri = p->p_swtime + p->p_slptime - 424 (p->p_nice - NZERO) * 8; 425 if (pri > ppri) { /* higher priority? remember it. */ 426 pp = p; 427 ppri = pri; 428 } 429 } 430 } 431 432 #ifdef DEBUG 433 if (swapdebug & SDB_FOLLOW) 434 printf("scheduler: running, procp %p pri %d\n", pp, ppri); 435 #endif 436 /* 437 * Nothing to do, back to sleep 438 */ 439 if ((p = pp) == NULL) { 440 tsleep((caddr_t)&proc0, PVM, "scheduler", 0); 441 goto loop; 442 } 443 444 /* 445 * we have found swapped out process which we would like to bring 446 * back in. 447 * 448 * XXX: this part is really bogus cuz we could deadlock on memory 449 * despite our feeble check 450 */ 451 if (uvmexp.free > atop(USPACE)) { 452 #ifdef DEBUG 453 if (swapdebug & SDB_SWAPIN) 454 printf("swapin: pid %d(%s)@%p, pri %d free %d\n", 455 p->p_pid, p->p_comm, p->p_addr, ppri, uvmexp.free); 456 #endif 457 uvm_swapin(p); 458 goto loop; 459 } 460 /* 461 * not enough memory, jab the pageout daemon and wait til the coast 462 * is clear 463 */ 464 #ifdef DEBUG 465 if (swapdebug & SDB_FOLLOW) 466 printf("scheduler: no room for pid %d(%s), free %d\n", 467 p->p_pid, p->p_comm, uvmexp.free); 468 #endif 469 (void) splhigh(); 470 uvm_wait("schedpwait"); 471 (void) spl0(); 472 #ifdef DEBUG 473 if (swapdebug & SDB_FOLLOW) 474 printf("scheduler: room again, free %d\n", uvmexp.free); 475 #endif 476 goto loop; 477 } 478 479 /* 480 * swappable: is process "p" swappable? 481 */ 482 483 #define swappable(p) \ 484 (((p)->p_flag & (P_SYSTEM | P_INMEM | P_WEXIT)) == P_INMEM && \ 485 (p)->p_holdcnt == 0) 486 487 /* 488 * swapout_threads: find threads that can be swapped and unwire their 489 * u-areas. 490 * 491 * - called by the pagedaemon 492 * - try and swap at least one processs 493 * - processes that are sleeping or stopped for maxslp or more seconds 494 * are swapped... otherwise the longest-sleeping or stopped process 495 * is swapped, otherwise the longest resident process... 496 */ 497 void 498 uvm_swapout_threads() 499 { 500 register struct proc *p; 501 struct proc *outp, *outp2; 502 int outpri, outpri2; 503 int didswap = 0; 504 extern int maxslp; 505 /* XXXCDC: should move off to uvmexp. or uvm., also in uvm_meter */ 506 507 #ifdef DEBUG 508 if (!enableswap) 509 return; 510 #endif 511 512 /* 513 * outp/outpri : stop/sleep process with largest sleeptime < maxslp 514 * outp2/outpri2: the longest resident process (its swap time) 515 */ 516 outp = outp2 = NULL; 517 outpri = outpri2 = 0; 518 for (p = allproc.lh_first; p != 0; p = p->p_list.le_next) { 519 if (!swappable(p)) 520 continue; 521 switch (p->p_stat) { 522 case SRUN: 523 if (p->p_swtime > outpri2) { 524 outp2 = p; 525 outpri2 = p->p_swtime; 526 } 527 continue; 528 529 case SSLEEP: 530 case SSTOP: 531 if (p->p_slptime >= maxslp) { 532 uvm_swapout(p); /* zap! */ 533 didswap++; 534 } else if (p->p_slptime > outpri) { 535 outp = p; 536 outpri = p->p_slptime; 537 } 538 continue; 539 } 540 } 541 542 /* 543 * If we didn't get rid of any real duds, toss out the next most 544 * likely sleeping/stopped or running candidate. We only do this 545 * if we are real low on memory since we don't gain much by doing 546 * it (USPACE bytes). 547 */ 548 if (didswap == 0 && uvmexp.free <= atop(round_page(USPACE))) { 549 if ((p = outp) == NULL) 550 p = outp2; 551 #ifdef DEBUG 552 if (swapdebug & SDB_SWAPOUT) 553 printf("swapout_threads: no duds, try procp %p\n", p); 554 #endif 555 if (p) 556 uvm_swapout(p); 557 } 558 } 559 560 /* 561 * uvm_swapout: swap out process "p" 562 * 563 * - currently "swapout" means "unwire U-area" and "pmap_collect()" 564 * the pmap. 565 * - XXXCDC: should deactivate all process' private anonymous memory 566 */ 567 568 static void 569 uvm_swapout(p) 570 register struct proc *p; 571 { 572 vaddr_t addr; 573 int s; 574 575 #ifdef DEBUG 576 if (swapdebug & SDB_SWAPOUT) 577 printf("swapout: pid %d(%s)@%p, stat %x pri %d free %d\n", 578 p->p_pid, p->p_comm, p->p_addr, p->p_stat, 579 p->p_slptime, uvmexp.free); 580 #endif 581 582 /* 583 * Do any machine-specific actions necessary before swapout. 584 * This can include saving floating point state, etc. 585 */ 586 cpu_swapout(p); 587 588 /* 589 * Unwire the to-be-swapped process's user struct and kernel stack. 590 */ 591 addr = (vaddr_t)p->p_addr; 592 uvm_fault_unwire(kernel_map->pmap, addr, addr + USPACE); /* !P_INMEM */ 593 pmap_collect(vm_map_pmap(&p->p_vmspace->vm_map)); 594 595 /* 596 * Mark it as (potentially) swapped out. 597 */ 598 s = splstatclock(); 599 p->p_flag &= ~P_INMEM; 600 if (p->p_stat == SRUN) 601 remrunqueue(p); 602 splx(s); 603 p->p_swtime = 0; 604 ++uvmexp.swapouts; 605 } 606 607