1 /* $NetBSD: vm_machdep.c,v 1.32 1997/10/18 00:17:21 gwr Exp $ */ 2 3 /* 4 * Copyright (c) 1996 5 * The President and Fellows of Harvard College. All rights reserved. 6 * Copyright (c) 1992, 1993 7 * The Regents of the University of California. All rights reserved. 8 * 9 * This software was developed by the Computer Systems Engineering group 10 * at Lawrence Berkeley Laboratory under DARPA contract BG 91-66 and 11 * contributed to Berkeley. 12 * 13 * All advertising materials mentioning features or use of this software 14 * must display the following acknowledgement: 15 * This product includes software developed by the University of 16 * California, Lawrence Berkeley Laboratory. 17 * This product includes software developed by Harvard University. 18 * 19 * Redistribution and use in source and binary forms, with or without 20 * modification, are permitted provided that the following conditions 21 * are met: 22 * 1. Redistributions of source code must retain the above copyright 23 * notice, this list of conditions and the following disclaimer. 24 * 2. Redistributions in binary form must reproduce the above copyright 25 * notice, this list of conditions and the following disclaimer in the 26 * documentation and/or other materials provided with the distribution. 27 * 3. All advertising materials mentioning features or use of this software 28 * must display the following acknowledgement: 29 * This product includes software developed by Harvard University. 30 * This product includes software developed by the University of 31 * California, Berkeley and its contributors. 32 * 4. Neither the name of the University nor the names of its contributors 33 * may be used to endorse or promote products derived from this software 34 * without specific prior written permission. 35 * 36 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 37 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 38 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 39 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 40 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 41 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 42 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 43 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 44 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 45 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 46 * SUCH DAMAGE. 47 * 48 * @(#)vm_machdep.c 8.2 (Berkeley) 9/23/93 49 */ 50 51 #include <sys/param.h> 52 #include <sys/systm.h> 53 #include <sys/proc.h> 54 #include <sys/user.h> 55 #include <sys/core.h> 56 #include <sys/malloc.h> 57 #include <sys/buf.h> 58 #include <sys/exec.h> 59 #include <sys/vnode.h> 60 #include <sys/map.h> 61 62 #include <vm/vm.h> 63 #include <vm/vm_kern.h> 64 65 #include <machine/cpu.h> 66 #include <machine/frame.h> 67 #include <machine/trap.h> 68 69 #include <sparc/sparc/cpuvar.h> 70 71 /* 72 * Move pages from one kernel virtual address to another. 73 */ 74 void 75 pagemove(from, to, size) 76 register caddr_t from, to; 77 size_t size; 78 { 79 register vm_offset_t pa; 80 81 if (size & CLOFSET || (int)from & CLOFSET || (int)to & CLOFSET) 82 panic("pagemove 1"); 83 while (size > 0) { 84 pa = pmap_extract(pmap_kernel(), (vm_offset_t)from); 85 if (pa == 0) 86 panic("pagemove 2"); 87 pmap_remove(pmap_kernel(), 88 (vm_offset_t)from, (vm_offset_t)from + PAGE_SIZE); 89 pmap_enter(pmap_kernel(), 90 (vm_offset_t)to, pa, VM_PROT_READ|VM_PROT_WRITE, 1); 91 from += PAGE_SIZE; 92 to += PAGE_SIZE; 93 size -= PAGE_SIZE; 94 } 95 } 96 97 /* 98 * Wrapper for dvma_mapin() in kernel space, 99 * so drivers need not include VM goo to get at kernel_map. 100 */ 101 caddr_t 102 kdvma_mapin(va, len, canwait) 103 caddr_t va; 104 int len, canwait; 105 { 106 return ((caddr_t)dvma_mapin(kernel_map, (vm_offset_t)va, len, canwait)); 107 } 108 109 caddr_t 110 dvma_malloc(len, kaddr, flags) 111 size_t len; 112 void *kaddr; 113 int flags; 114 { 115 vm_offset_t kva; 116 vm_offset_t dva; 117 #if defined(SUN4M) 118 extern int has_iocache; 119 #endif 120 121 len = round_page(len); 122 kva = (vm_offset_t)malloc(len, M_DEVBUF, flags); 123 if (kva == NULL) 124 return (NULL); 125 126 #if defined(SUN4M) 127 if (!has_iocache) 128 #endif 129 kvm_uncache((caddr_t)kva, len >> PGSHIFT); 130 131 *(vm_offset_t *)kaddr = kva; 132 dva = dvma_mapin(kernel_map, kva, len, (flags & M_NOWAIT) ? 0 : 1); 133 if (dva == NULL) { 134 free((void *)kva, M_DEVBUF); 135 return (NULL); 136 } 137 return (caddr_t)dva; 138 } 139 140 void 141 dvma_free(dva, len, kaddr) 142 caddr_t dva; 143 size_t len; 144 void *kaddr; 145 { 146 vm_offset_t kva = *(vm_offset_t *)kaddr; 147 148 dvma_mapout((vm_offset_t)dva, kva, round_page(len)); 149 free((void *)kva, M_DEVBUF); 150 } 151 152 u_long dvma_cachealign = 0; 153 154 /* 155 * Map a range [va, va+len] of wired virtual addresses in the given map 156 * to a kernel address in DVMA space. 157 */ 158 vm_offset_t 159 dvma_mapin(map, va, len, canwait) 160 struct vm_map *map; 161 vm_offset_t va; 162 int len, canwait; 163 { 164 vm_offset_t kva, tva; 165 register int npf, s; 166 register vm_offset_t pa; 167 long off, pn; 168 vm_offset_t ova; 169 int olen; 170 171 ova = va; 172 olen = len; 173 174 off = (int)va & PGOFSET; 175 va -= off; 176 len = round_page(len + off); 177 npf = btoc(len); 178 179 s = splimp(); 180 for (;;) { 181 182 if (dvma_cachealign) { 183 int m = (dvma_cachealign >> PGSHIFT); 184 int palign = (va >> PGSHIFT) & (m - 1); 185 long basepn; 186 187 /* 188 * Find a DVMA address that's congruent to VA 189 * modulo the cache size. This needs only be 190 * done on machines with virtually indexed 191 * caches capable of DVMA coherency (eg. Hypersparc) 192 * 193 * XXX - there should be a better way.. 194 */ 195 basepn = pn = rmalloc(dvmamap, npf + m - 1); 196 if (pn != 0) { 197 pn += (palign + 1 + m - pn) & (m - 1); 198 199 /* Free excess resources */ 200 if (pn != basepn) 201 rmfree(dvmamap, pn - basepn, basepn); 202 if (pn != basepn + (m - 1)) 203 rmfree(dvmamap, m - 1 - (pn - basepn), 204 pn + npf); 205 break; 206 } 207 208 } else { 209 pn = rmalloc(dvmamap, npf); 210 if (pn != 0) 211 break; 212 } 213 if (canwait) { 214 (void)tsleep(dvmamap, PRIBIO+1, "physio", 0); 215 continue; 216 } 217 splx(s); 218 return NULL; 219 } 220 splx(s); 221 222 kva = tva = rctov(pn); 223 224 while (npf--) { 225 pa = pmap_extract(vm_map_pmap(map), va); 226 if (pa == 0) 227 panic("dvma_mapin: null page frame"); 228 pa = trunc_page(pa); 229 230 #if defined(SUN4M) 231 if (CPU_ISSUN4M) { 232 iommu_enter(tva, pa); 233 } else 234 #endif 235 { 236 /* 237 * pmap_enter distributes this mapping to all 238 * contexts... maybe we should avoid this extra work 239 */ 240 #ifdef notyet 241 #if defined(SUN4) 242 if (have_iocache) 243 pa |= PG_IOC; 244 #endif 245 #endif 246 pmap_enter(pmap_kernel(), tva, 247 pa | PMAP_NC, 248 VM_PROT_READ|VM_PROT_WRITE, 1); 249 } 250 251 tva += PAGE_SIZE; 252 va += PAGE_SIZE; 253 } 254 255 /* 256 * XXX Only have to do this on write. 257 */ 258 if (CACHEINFO.c_vactype == VAC_WRITEBACK) /* XXX */ 259 cpuinfo.cache_flush((caddr_t)ova, olen); /* XXX */ 260 261 return kva + off; 262 } 263 264 /* 265 * Remove double map of `va' in DVMA space at `kva'. 266 */ 267 void 268 dvma_mapout(kva, va, len) 269 vm_offset_t kva, va; 270 int len; 271 { 272 register int s, off; 273 274 off = (int)kva & PGOFSET; 275 kva -= off; 276 len = round_page(len + off); 277 278 #if defined(SUN4M) 279 if (cputyp == CPU_SUN4M) 280 iommu_remove(kva, len); 281 else 282 #endif 283 pmap_remove(pmap_kernel(), kva, kva + len); 284 285 s = splimp(); 286 rmfree(dvmamap, btoc(len), vtorc(kva)); 287 wakeup(dvmamap); 288 splx(s); 289 290 if (CACHEINFO.c_vactype != VAC_NONE) 291 cpuinfo.cache_flush((caddr_t)va, len); 292 } 293 294 /* 295 * Map an IO request into kernel virtual address space. 296 */ 297 void 298 vmapbuf(bp, len) 299 struct buf *bp; 300 vm_size_t len; 301 { 302 struct pmap *upmap, *kpmap; 303 vm_offset_t uva; /* User VA (map from) */ 304 vm_offset_t kva; /* Kernel VA (new to) */ 305 vm_offset_t pa; /* physical address */ 306 vm_size_t off; 307 308 if ((bp->b_flags & B_PHYS) == 0) 309 panic("vmapbuf"); 310 311 /* 312 * XXX: It might be better to round/trunc to a 313 * segment boundary to avoid VAC problems! 314 */ 315 bp->b_saveaddr = bp->b_data; 316 uva = trunc_page(bp->b_data); 317 off = (vm_offset_t)bp->b_data - uva; 318 len = round_page(off + len); 319 kva = kmem_alloc_wait(kernel_map, len); 320 bp->b_data = (caddr_t)(kva + off); 321 322 /* 323 * We have to flush any write-back cache on the 324 * user-space mappings so our new mappings will 325 * have the correct contents. 326 */ 327 if (CACHEINFO.c_vactype != VAC_NONE) 328 cpuinfo.cache_flush((caddr_t)uva, len); 329 330 upmap = vm_map_pmap(&bp->b_proc->p_vmspace->vm_map); 331 kpmap = vm_map_pmap(kernel_map); 332 do { 333 pa = pmap_extract(upmap, uva); 334 if (pa == 0) 335 panic("vmapbuf: null page frame"); 336 /* Now map the page into kernel space. */ 337 pmap_enter(kpmap, kva, pa | PMAP_NC, 338 VM_PROT_READ|VM_PROT_WRITE, TRUE); 339 uva += PAGE_SIZE; 340 kva += PAGE_SIZE; 341 len -= PAGE_SIZE; 342 } while (len); 343 } 344 345 /* 346 * Free the mappings associated with this I/O operation. 347 */ 348 void 349 vunmapbuf(bp, len) 350 struct buf *bp; 351 vm_size_t len; 352 { 353 vm_offset_t kva; 354 vm_size_t off; 355 356 if ((bp->b_flags & B_PHYS) == 0) 357 panic("vunmapbuf"); 358 359 kva = trunc_page(bp->b_data); 360 off = (vm_offset_t)bp->b_data - kva; 361 len = round_page(off + len); 362 363 /* This will call pmap_remove() for us. */ 364 kmem_free_wakeup(kernel_map, kva, len); 365 bp->b_data = bp->b_saveaddr; 366 bp->b_saveaddr = NULL; 367 368 #if 0 /* XXX: The flush above is sufficient, right? */ 369 if (CACHEINFO.c_vactype != VAC_NONE) 370 cpuinfo.cache_flush(bp->b_data, len); 371 #endif 372 } 373 374 375 /* 376 * The offset of the topmost frame in the kernel stack. 377 */ 378 #define TOPFRAMEOFF (USPACE-sizeof(struct trapframe)-sizeof(struct frame)) 379 380 /* 381 * Finish a fork operation, with process p2 nearly set up. 382 * Copy and update the pcb, making the child ready to run, and marking 383 * it so that it can return differently than the parent. 384 * 385 * This function relies on the fact that the pcb is 386 * the first element in struct user. 387 */ 388 void 389 cpu_fork(p1, p2) 390 register struct proc *p1, *p2; 391 { 392 register struct pcb *opcb = &p1->p_addr->u_pcb; 393 register struct pcb *npcb = &p2->p_addr->u_pcb; 394 register struct trapframe *tf2; 395 register struct rwindow *rp; 396 397 /* 398 * Save all user registers to p1's stack or, in the case of 399 * user registers and invalid stack pointers, to opcb. 400 * We then copy the whole pcb to p2; when switch() selects p2 401 * to run, it will run at the `proc_trampoline' stub, rather 402 * than returning at the copying code below. 403 * 404 * If process p1 has an FPU state, we must copy it. If it is 405 * the FPU user, we must save the FPU state first. 406 */ 407 408 write_user_windows(); 409 opcb->pcb_psr = getpsr(); 410 bcopy((caddr_t)opcb, (caddr_t)npcb, sizeof(struct pcb)); 411 if (p1->p_md.md_fpstate) { 412 if (p1 == fpproc) 413 savefpstate(p1->p_md.md_fpstate); 414 p2->p_md.md_fpstate = malloc(sizeof(struct fpstate), 415 M_SUBPROC, M_WAITOK); 416 bcopy(p1->p_md.md_fpstate, p2->p_md.md_fpstate, 417 sizeof(struct fpstate)); 418 } else 419 p2->p_md.md_fpstate = NULL; 420 421 /* 422 * Setup (kernel) stack frame that will by-pass the child 423 * out of the kernel. (The trap frame invariably resides at 424 * the tippity-top of the u. area.) 425 */ 426 tf2 = p2->p_md.md_tf = (struct trapframe *) 427 ((int)npcb + USPACE - sizeof(*tf2)); 428 429 /* Copy parent's trapframe */ 430 *tf2 = *(struct trapframe *)((int)opcb + USPACE - sizeof(*tf2)); 431 432 /* Duplicate efforts of syscall(), but slightly differently */ 433 if (tf2->tf_global[1] & SYSCALL_G2RFLAG) { 434 /* jmp %g2 (or %g7, deprecated) on success */ 435 tf2->tf_npc = tf2->tf_global[2]; 436 } else { 437 /* 438 * old system call convention: clear C on success 439 * note: proc_trampoline() sets a fresh psr when 440 * returning to user mode. 441 */ 442 /*tf2->tf_psr &= ~PSR_C; -* success */ 443 } 444 445 /* Set return values in child mode */ 446 tf2->tf_out[0] = 0; 447 tf2->tf_out[1] = 1; 448 449 /* Construct kernel frame to return to in cpu_switch() */ 450 rp = (struct rwindow *)((u_int)npcb + TOPFRAMEOFF); 451 rp->rw_local[0] = (int)child_return; /* Function to call */ 452 rp->rw_local[1] = (int)p2; /* and its argument */ 453 454 npcb->pcb_pc = (int)proc_trampoline - 8; 455 npcb->pcb_sp = (int)rp; 456 npcb->pcb_psr &= ~PSR_CWP; /* Run in window #0 */ 457 npcb->pcb_wim = 1; /* Fence at window #1 */ 458 459 } 460 461 /* 462 * cpu_set_kpc: 463 * 464 * Arrange for in-kernel execution of a process to continue at the 465 * named pc, as if the code at that address were called as a function 466 * with the current process's process pointer as an argument. 467 * 468 * Note that it's assumed that when the named process returns, 469 * we immediately return to user mode. 470 * 471 * (Note that cpu_fork(), above, uses an open-coded version of this.) 472 */ 473 void 474 cpu_set_kpc(p, pc) 475 struct proc *p; 476 void (*pc) __P((struct proc *)); 477 { 478 struct pcb *pcb; 479 struct rwindow *rp; 480 481 pcb = &p->p_addr->u_pcb; 482 483 rp = (struct rwindow *)((u_int)pcb + TOPFRAMEOFF); 484 rp->rw_local[0] = (int)pc; /* Function to call */ 485 rp->rw_local[1] = (int)p; /* and its argument */ 486 487 /* 488 * Frob PCB: 489 * - arrange to return to proc_trampoline() from cpu_switch() 490 * - point it at the stack frame constructed above 491 * - make it run in a clear set of register windows 492 */ 493 pcb->pcb_pc = (int)proc_trampoline - 8; 494 pcb->pcb_sp = (int)rp; 495 pcb->pcb_psr &= ~PSR_CWP; /* Run in window #0 */ 496 pcb->pcb_wim = 1; /* Fence at window #1 */ 497 } 498 499 /* 500 * cpu_exit is called as the last action during exit. 501 * We release the address space and machine-dependent resources, 502 * including the memory for the user structure and kernel stack. 503 * Since the latter is also the interrupt stack, we release it 504 * from assembly code after switching to a temporary pcb+stack. 505 */ 506 void 507 cpu_exit(p) 508 struct proc *p; 509 { 510 register struct fpstate *fs; 511 512 if ((fs = p->p_md.md_fpstate) != NULL) { 513 if (p == fpproc) { 514 savefpstate(fs); 515 fpproc = NULL; 516 } 517 free((void *)fs, M_SUBPROC); 518 } 519 vmspace_free(p->p_vmspace); 520 switchexit(kernel_map, p->p_addr, USPACE); 521 /* NOTREACHED */ 522 } 523 524 /* 525 * cpu_coredump is called to write a core dump header. 526 * (should this be defined elsewhere? machdep.c?) 527 */ 528 int 529 cpu_coredump(p, vp, cred, chdr) 530 struct proc *p; 531 struct vnode *vp; 532 struct ucred *cred; 533 struct core *chdr; 534 { 535 int error; 536 struct md_coredump md_core; 537 struct coreseg cseg; 538 539 CORE_SETMAGIC(*chdr, COREMAGIC, MID_SPARC, 0); 540 chdr->c_hdrsize = ALIGN(sizeof(*chdr)); 541 chdr->c_seghdrsize = ALIGN(sizeof(cseg)); 542 chdr->c_cpusize = sizeof(md_core); 543 544 md_core.md_tf = *p->p_md.md_tf; 545 if (p->p_md.md_fpstate) { 546 if (p == fpproc) 547 savefpstate(p->p_md.md_fpstate); 548 md_core.md_fpstate = *p->p_md.md_fpstate; 549 } else 550 bzero((caddr_t)&md_core.md_fpstate, sizeof(struct fpstate)); 551 552 CORE_SETMAGIC(cseg, CORESEGMAGIC, MID_SPARC, CORE_CPU); 553 cseg.c_addr = 0; 554 cseg.c_size = chdr->c_cpusize; 555 error = vn_rdwr(UIO_WRITE, vp, (caddr_t)&cseg, chdr->c_seghdrsize, 556 (off_t)chdr->c_hdrsize, UIO_SYSSPACE, 557 IO_NODELOCKED|IO_UNIT, cred, (int *)NULL, p); 558 if (error) 559 return error; 560 561 error = vn_rdwr(UIO_WRITE, vp, (caddr_t)&md_core, sizeof(md_core), 562 (off_t)(chdr->c_hdrsize + chdr->c_seghdrsize), UIO_SYSSPACE, 563 IO_NODELOCKED|IO_UNIT, cred, (int *)NULL, p); 564 if (!error) 565 chdr->c_nseg++; 566 567 return error; 568 } 569