1 /* $NetBSD: vm_machdep.c,v 1.29 1996/10/28 23:02:54 pk Exp $ */ 2 3 /* 4 * Copyright (c) 1996 5 * The President and Fellows of Harvard College. All rights reserved. 6 * Copyright (c) 1992, 1993 7 * The Regents of the University of California. All rights reserved. 8 * 9 * This software was developed by the Computer Systems Engineering group 10 * at Lawrence Berkeley Laboratory under DARPA contract BG 91-66 and 11 * contributed to Berkeley. 12 * 13 * All advertising materials mentioning features or use of this software 14 * must display the following acknowledgement: 15 * This product includes software developed by the University of 16 * California, Lawrence Berkeley Laboratory. 17 * This product includes software developed by Harvard University. 18 * 19 * Redistribution and use in source and binary forms, with or without 20 * modification, are permitted provided that the following conditions 21 * are met: 22 * 1. Redistributions of source code must retain the above copyright 23 * notice, this list of conditions and the following disclaimer. 24 * 2. Redistributions in binary form must reproduce the above copyright 25 * notice, this list of conditions and the following disclaimer in the 26 * documentation and/or other materials provided with the distribution. 27 * 3. All advertising materials mentioning features or use of this software 28 * must display the following acknowledgement: 29 * This product includes software developed by Harvard University. 30 * This product includes software developed by the University of 31 * California, Berkeley and its contributors. 32 * 4. Neither the name of the University nor the names of its contributors 33 * may be used to endorse or promote products derived from this software 34 * without specific prior written permission. 35 * 36 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 37 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 38 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 39 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 40 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 41 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 42 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 43 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 44 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 45 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 46 * SUCH DAMAGE. 47 * 48 * @(#)vm_machdep.c 8.2 (Berkeley) 9/23/93 49 */ 50 51 #include <sys/param.h> 52 #include <sys/systm.h> 53 #include <sys/proc.h> 54 #include <sys/user.h> 55 #include <sys/core.h> 56 #include <sys/malloc.h> 57 #include <sys/buf.h> 58 #include <sys/exec.h> 59 #include <sys/vnode.h> 60 #include <sys/map.h> 61 62 #include <vm/vm.h> 63 #include <vm/vm_kern.h> 64 65 #include <machine/cpu.h> 66 #include <machine/frame.h> 67 #include <machine/trap.h> 68 69 #include <sparc/sparc/cache.h> 70 71 /* 72 * Move pages from one kernel virtual address to another. 73 */ 74 void 75 pagemove(from, to, size) 76 register caddr_t from, to; 77 size_t size; 78 { 79 register vm_offset_t pa; 80 81 if (size & CLOFSET || (int)from & CLOFSET || (int)to & CLOFSET) 82 panic("pagemove 1"); 83 while (size > 0) { 84 pa = pmap_extract(pmap_kernel(), (vm_offset_t)from); 85 if (pa == 0) 86 panic("pagemove 2"); 87 pmap_remove(pmap_kernel(), 88 (vm_offset_t)from, (vm_offset_t)from + PAGE_SIZE); 89 pmap_enter(pmap_kernel(), 90 (vm_offset_t)to, pa, VM_PROT_READ|VM_PROT_WRITE, 1); 91 from += PAGE_SIZE; 92 to += PAGE_SIZE; 93 size -= PAGE_SIZE; 94 } 95 } 96 97 /* 98 * Wrapper for dvma_mapin() in kernel space, 99 * so drivers need not include VM goo to get at kernel_map. 100 */ 101 caddr_t 102 kdvma_mapin(va, len, canwait) 103 caddr_t va; 104 int len, canwait; 105 { 106 return ((caddr_t)dvma_mapin(kernel_map, (vm_offset_t)va, len, canwait)); 107 } 108 109 caddr_t 110 dvma_malloc(len, kaddr, flags) 111 size_t len; 112 void *kaddr; 113 int flags; 114 { 115 vm_offset_t kva; 116 vm_offset_t dva; 117 #if defined(SUN4M) 118 extern int has_iocache; 119 #endif 120 121 len = round_page(len); 122 kva = (vm_offset_t)malloc(len, M_DEVBUF, flags); 123 if (kva == NULL) 124 return (NULL); 125 126 #if defined(SUN4M) 127 if (!has_iocache) 128 #endif 129 kvm_uncache((caddr_t)kva, len >> PGSHIFT); 130 131 *(vm_offset_t *)kaddr = kva; 132 dva = dvma_mapin(kernel_map, kva, len, (flags & M_NOWAIT) ? 0 : 1); 133 if (dva == NULL) { 134 free((void *)kva, M_DEVBUF); 135 return (NULL); 136 } 137 return (caddr_t)dva; 138 } 139 140 void 141 dvma_free(dva, len, kaddr) 142 caddr_t dva; 143 size_t len; 144 void *kaddr; 145 { 146 vm_offset_t kva = *(vm_offset_t *)kaddr; 147 148 dvma_mapout((vm_offset_t)dva, kva, round_page(len)); 149 free((void *)kva, M_DEVBUF); 150 } 151 152 /* 153 * Map a range [va, va+len] of wired virtual addresses in the given map 154 * to a kernel address in DVMA space. 155 */ 156 vm_offset_t 157 dvma_mapin(map, va, len, canwait) 158 struct vm_map *map; 159 vm_offset_t va; 160 int len, canwait; 161 { 162 vm_offset_t kva, tva; 163 register int npf, s; 164 register vm_offset_t pa; 165 long off, pn; 166 vm_offset_t ova; 167 int olen; 168 169 ova = va; 170 olen = len; 171 172 off = (int)va & PGOFSET; 173 va -= off; 174 len = round_page(len + off); 175 npf = btoc(len); 176 177 s = splimp(); 178 for (;;) { 179 180 pn = rmalloc(dvmamap, npf); 181 182 if (pn != 0) 183 break; 184 if (canwait) { 185 (void)tsleep(dvmamap, PRIBIO+1, "physio", 0); 186 continue; 187 } 188 splx(s); 189 return NULL; 190 } 191 splx(s); 192 193 kva = tva = rctov(pn); 194 195 while (npf--) { 196 pa = pmap_extract(vm_map_pmap(map), va); 197 if (pa == 0) 198 panic("dvma_mapin: null page frame"); 199 pa = trunc_page(pa); 200 201 #if defined(SUN4M) 202 if (CPU_ISSUN4M) { 203 iommu_enter(tva, pa); 204 } else 205 #endif 206 { 207 /* 208 * pmap_enter distributes this mapping to all 209 * contexts... maybe we should avoid this extra work 210 */ 211 #ifdef notyet 212 #if defined(SUN4) 213 if (have_iocache) 214 pa |= PG_IOC; 215 #endif 216 #endif 217 pmap_enter(pmap_kernel(), tva, 218 pa | PMAP_NC, 219 VM_PROT_READ|VM_PROT_WRITE, 1); 220 } 221 222 tva += PAGE_SIZE; 223 va += PAGE_SIZE; 224 } 225 226 /* 227 * XXX Only have to do this on write. 228 */ 229 if (vactype == VAC_WRITEBACK) /* XXX */ 230 cache_flush((caddr_t)ova, olen); /* XXX */ 231 232 return kva + off; 233 } 234 235 /* 236 * Remove double map of `va' in DVMA space at `kva'. 237 */ 238 void 239 dvma_mapout(kva, va, len) 240 vm_offset_t kva, va; 241 int len; 242 { 243 register int s, off; 244 245 off = (int)kva & PGOFSET; 246 kva -= off; 247 len = round_page(len + off); 248 249 #if defined(SUN4M) 250 if (cputyp == CPU_SUN4M) 251 iommu_remove(kva, len); 252 else 253 #endif 254 pmap_remove(pmap_kernel(), kva, kva + len); 255 256 s = splimp(); 257 rmfree(dvmamap, btoc(len), vtorc(kva)); 258 wakeup(dvmamap); 259 splx(s); 260 261 if (vactype != VAC_NONE) 262 cache_flush((caddr_t)va, len); 263 } 264 265 /* 266 * Map an IO request into kernel virtual address space. 267 */ 268 /*ARGSUSED*/ 269 void 270 vmapbuf(bp, sz) 271 register struct buf *bp; 272 vm_size_t sz; 273 { 274 register vm_offset_t addr, kva, pa; 275 register vm_size_t size, off; 276 register int npf; 277 struct proc *p; 278 register struct vm_map *map; 279 280 if ((bp->b_flags & B_PHYS) == 0) 281 panic("vmapbuf"); 282 p = bp->b_proc; 283 map = &p->p_vmspace->vm_map; 284 bp->b_saveaddr = bp->b_data; 285 addr = (vm_offset_t)bp->b_saveaddr; 286 off = addr & PGOFSET; 287 size = round_page(bp->b_bcount + off); 288 kva = kmem_alloc_wait(kernel_map, size); 289 bp->b_data = (caddr_t)(kva + off); 290 addr = trunc_page(addr); 291 npf = btoc(size); 292 while (npf--) { 293 pa = pmap_extract(vm_map_pmap(map), (vm_offset_t)addr); 294 if (pa == 0) 295 panic("vmapbuf: null page frame"); 296 297 /* 298 * pmap_enter distributes this mapping to all 299 * contexts... maybe we should avoid this extra work 300 */ 301 pmap_enter(pmap_kernel(), kva, 302 pa | PMAP_NC, 303 VM_PROT_READ|VM_PROT_WRITE, 1); 304 305 addr += PAGE_SIZE; 306 kva += PAGE_SIZE; 307 } 308 } 309 310 /* 311 * Free the io map addresses associated with this IO operation. 312 */ 313 /*ARGSUSED*/ 314 void 315 vunmapbuf(bp, sz) 316 register struct buf *bp; 317 vm_size_t sz; 318 { 319 register vm_offset_t kva = (vm_offset_t)bp->b_data; 320 register vm_size_t size, off; 321 322 if ((bp->b_flags & B_PHYS) == 0) 323 panic("vunmapbuf"); 324 325 kva = (vm_offset_t)bp->b_data; 326 off = kva & PGOFSET; 327 size = round_page(bp->b_bcount + off); 328 kmem_free_wakeup(kernel_map, trunc_page(kva), size); 329 bp->b_data = bp->b_saveaddr; 330 bp->b_saveaddr = NULL; 331 if (vactype != VAC_NONE) 332 cache_flush(bp->b_un.b_addr, bp->b_bcount - bp->b_resid); 333 } 334 335 336 /* 337 * The offset of the topmost frame in the kernel stack. 338 */ 339 #define TOPFRAMEOFF (USPACE-sizeof(struct trapframe)-sizeof(struct frame)) 340 341 /* 342 * Finish a fork operation, with process p2 nearly set up. 343 * Copy and update the pcb, making the child ready to run, and marking 344 * it so that it can return differently than the parent. 345 * 346 * This function relies on the fact that the pcb is 347 * the first element in struct user. 348 */ 349 void 350 cpu_fork(p1, p2) 351 register struct proc *p1, *p2; 352 { 353 register struct pcb *opcb = &p1->p_addr->u_pcb; 354 register struct pcb *npcb = &p2->p_addr->u_pcb; 355 register struct trapframe *tf2; 356 register struct rwindow *rp; 357 358 /* 359 * Save all user registers to p1's stack or, in the case of 360 * user registers and invalid stack pointers, to opcb. 361 * We then copy the whole pcb to p2; when switch() selects p2 362 * to run, it will run at the `proc_trampoline' stub, rather 363 * than returning at the copying code below. 364 * 365 * If process p1 has an FPU state, we must copy it. If it is 366 * the FPU user, we must save the FPU state first. 367 */ 368 369 write_user_windows(); 370 opcb->pcb_psr = getpsr(); 371 bcopy((caddr_t)opcb, (caddr_t)npcb, sizeof(struct pcb)); 372 if (p1->p_md.md_fpstate) { 373 if (p1 == fpproc) 374 savefpstate(p1->p_md.md_fpstate); 375 p2->p_md.md_fpstate = malloc(sizeof(struct fpstate), 376 M_SUBPROC, M_WAITOK); 377 bcopy(p1->p_md.md_fpstate, p2->p_md.md_fpstate, 378 sizeof(struct fpstate)); 379 } else 380 p2->p_md.md_fpstate = NULL; 381 382 /* 383 * Setup (kernel) stack frame that will by-pass the child 384 * out of the kernel. (The trap frame invariably resides at 385 * the tippity-top of the u. area.) 386 */ 387 tf2 = p2->p_md.md_tf = (struct trapframe *) 388 ((int)npcb + USPACE - sizeof(*tf2)); 389 390 /* Copy parent's trapframe */ 391 *tf2 = *(struct trapframe *)((int)opcb + USPACE - sizeof(*tf2)); 392 393 /* Duplicate efforts of syscall(), but slightly differently */ 394 if (tf2->tf_global[1] & SYSCALL_G2RFLAG) { 395 /* jmp %g2 (or %g7, deprecated) on success */ 396 tf2->tf_npc = tf2->tf_global[2]; 397 } else { 398 /* 399 * old system call convention: clear C on success 400 * note: proc_trampoline() sets a fresh psr when 401 * returning to user mode. 402 */ 403 /*tf2->tf_psr &= ~PSR_C; -* success */ 404 } 405 406 /* Set return values in child mode */ 407 tf2->tf_out[0] = 0; 408 tf2->tf_out[1] = 1; 409 410 /* Construct kernel frame to return to in cpu_switch() */ 411 rp = (struct rwindow *)((u_int)npcb + TOPFRAMEOFF); 412 rp->rw_local[0] = (int)child_return; /* Function to call */ 413 rp->rw_local[1] = (int)p2; /* and its argument */ 414 415 npcb->pcb_pc = (int)proc_trampoline - 8; 416 npcb->pcb_sp = (int)rp; 417 npcb->pcb_psr &= ~PSR_CWP; /* Run in window #0 */ 418 npcb->pcb_wim = 1; /* Fence at window #1 */ 419 420 } 421 422 /* 423 * cpu_set_kpc: 424 * 425 * Arrange for in-kernel execution of a process to continue at the 426 * named pc, as if the code at that address were called as a function 427 * with the current process's process pointer as an argument. 428 * 429 * Note that it's assumed that when the named process returns, 430 * we immediately return to user mode. 431 * 432 * (Note that cpu_fork(), above, uses an open-coded version of this.) 433 */ 434 void 435 cpu_set_kpc(p, pc) 436 struct proc *p; 437 void (*pc) __P((struct proc *)); 438 { 439 struct pcb *pcb; 440 struct rwindow *rp; 441 442 pcb = &p->p_addr->u_pcb; 443 444 rp = (struct rwindow *)((u_int)pcb + TOPFRAMEOFF); 445 rp->rw_local[0] = (int)pc; /* Function to call */ 446 rp->rw_local[1] = (int)p; /* and its argument */ 447 448 /* 449 * Frob PCB: 450 * - arrange to return to proc_trampoline() from cpu_switch() 451 * - point it at the stack frame constructed above 452 * - make it run in a clear set of register windows 453 */ 454 pcb->pcb_pc = (int)proc_trampoline - 8; 455 pcb->pcb_sp = (int)rp; 456 pcb->pcb_psr &= ~PSR_CWP; /* Run in window #0 */ 457 pcb->pcb_wim = 1; /* Fence at window #1 */ 458 } 459 460 /* 461 * cpu_exit is called as the last action during exit. 462 * We release the address space and machine-dependent resources, 463 * including the memory for the user structure and kernel stack. 464 * Since the latter is also the interrupt stack, we release it 465 * from assembly code after switching to a temporary pcb+stack. 466 */ 467 void 468 cpu_exit(p) 469 struct proc *p; 470 { 471 register struct fpstate *fs; 472 473 if ((fs = p->p_md.md_fpstate) != NULL) { 474 if (p == fpproc) { 475 savefpstate(fs); 476 fpproc = NULL; 477 } 478 free((void *)fs, M_SUBPROC); 479 } 480 vmspace_free(p->p_vmspace); 481 switchexit(kernel_map, p->p_addr, USPACE); 482 /* NOTREACHED */ 483 } 484 485 /* 486 * cpu_coredump is called to write a core dump header. 487 * (should this be defined elsewhere? machdep.c?) 488 */ 489 int 490 cpu_coredump(p, vp, cred, chdr) 491 struct proc *p; 492 struct vnode *vp; 493 struct ucred *cred; 494 struct core *chdr; 495 { 496 int error; 497 struct md_coredump md_core; 498 struct coreseg cseg; 499 500 CORE_SETMAGIC(*chdr, COREMAGIC, MID_SPARC, 0); 501 chdr->c_hdrsize = ALIGN(sizeof(*chdr)); 502 chdr->c_seghdrsize = ALIGN(sizeof(cseg)); 503 chdr->c_cpusize = sizeof(md_core); 504 505 md_core.md_tf = *p->p_md.md_tf; 506 if (p->p_md.md_fpstate) { 507 if (p == fpproc) 508 savefpstate(p->p_md.md_fpstate); 509 md_core.md_fpstate = *p->p_md.md_fpstate; 510 } else 511 bzero((caddr_t)&md_core.md_fpstate, sizeof(struct fpstate)); 512 513 CORE_SETMAGIC(cseg, CORESEGMAGIC, MID_SPARC, CORE_CPU); 514 cseg.c_addr = 0; 515 cseg.c_size = chdr->c_cpusize; 516 error = vn_rdwr(UIO_WRITE, vp, (caddr_t)&cseg, chdr->c_seghdrsize, 517 (off_t)chdr->c_hdrsize, UIO_SYSSPACE, 518 IO_NODELOCKED|IO_UNIT, cred, (int *)NULL, p); 519 if (error) 520 return error; 521 522 error = vn_rdwr(UIO_WRITE, vp, (caddr_t)&md_core, sizeof(md_core), 523 (off_t)(chdr->c_hdrsize + chdr->c_seghdrsize), UIO_SYSSPACE, 524 IO_NODELOCKED|IO_UNIT, cred, (int *)NULL, p); 525 if (!error) 526 chdr->c_nseg++; 527 528 return error; 529 } 530