1 /*-
2 * Copyright (c) 1982, 1986, 1993
3 * The Regents of the University of California. All rights reserved.
4 *
5 * This code is derived from software contributed to Berkeley by
6 * the Systems Programming Group of the University of Utah Computer
7 * Science Department, and William Jolitz.
8 *
9 * %sccs.include.redist.c%
10 *
11 * @(#)vm_machdep.c 8.3 (Berkeley) 01/21/94
12 * Utah $Hdr: vm_machdep.c 1.16.1.1 89/06/23$
13 */
14
15 #include <sys/param.h>
16 #include <sys/systm.h>
17 #include <sys/proc.h>
18 #include <sys/malloc.h>
19 #include <sys/buf.h>
20 #include <sys/vnode.h>
21 #include <sys/user.h>
22
23 #include <machine/cpu.h>
24
25 #include <vm/vm.h>
26 #include <vm/vm_kern.h>
27
28 /*
29 * Finish a fork operation, with process p2 nearly set up.
30 * Copy and update the kernel stack and pcb, making the child
31 * ready to run, and marking it so that it can return differently
32 * than the parent. Returns 1 in the child process, 0 in the parent.
33 * We currently double-map the user area so that the stack is at the same
34 * address in each process; in the future we will probably relocate
35 * the frame pointers on the stack after copying.
36 */
cpu_fork(p1,p2)37 cpu_fork(p1, p2)
38 register struct proc *p1, *p2;
39 {
40 register struct user *up = p2->p_addr;
41 int foo, offset, addr, i;
42 extern char kstack[];
43 extern int mvesp();
44
45 /*
46 * Copy pcb and stack from proc p1 to p2.
47 * We do this as cheaply as possible, copying only the active
48 * part of the stack. The stack and pcb need to agree;
49 * this is tricky, as the final pcb is constructed by savectx,
50 * but its frame isn't yet on the stack when the stack is copied.
51 * mi_switch compensates for this when the child eventually runs.
52 * This should be done differently, with a single call
53 * that copies and updates the pcb+stack,
54 * replacing the bcopy and savectx.
55 */
56 p2->p_addr->u_pcb = p1->p_addr->u_pcb;
57 offset = mvesp() - (int)kstack;
58 bcopy((caddr_t)kstack + offset, (caddr_t)p2->p_addr + offset,
59 (unsigned) ctob(UPAGES) - offset);
60 p2->p_md.md_regs = p1->p_md.md_regs;
61
62 /*
63 * Wire top of address space of child to it's u.
64 * First, fault in a page of pte's to map it.
65 */
66 addr = trunc_page((u_int)vtopte(kstack));
67 (void)vm_map_pageable(&p2->p_vmspace->vm_map, addr, addr+NBPG, FALSE);
68 for (i=0; i < UPAGES; i++)
69 pmap_enter(&p2->p_vmspace->vm_pmap, (vm_offset_t)kstack+i*NBPG,
70 pmap_extract(kernel_pmap, ((int)p2->p_addr)+i*NBPG),
71 VM_PROT_READ, 1);
72 pmap_activate(&p2->p_vmspace->vm_pmap, &up->u_pcb);
73
74 /*
75 *
76 * Arrange for a non-local goto when the new process
77 * is started, to resume here, returning nonzero from setjmp.
78 */
79 if (savectx(up, 1)) {
80 /*
81 * Return 1 in child.
82 */
83 return (1);
84 }
85 return (0);
86 }
87
88 #include "npx.h"
89 #if NNPX > 0
90 extern struct proc *npxproc;
91 #endif
92
93 #ifdef notyet
94 /*
95 * cpu_exit is called as the last action during exit.
96 *
97 * We change to an inactive address space and a "safe" stack,
98 * passing thru an argument to the new stack. Now, safely isolated
99 * from the resources we're shedding, we release the address space
100 * and any remaining machine-dependent resources, including the
101 * memory for the user structure and kernel stack.
102 *
103 * Next, we assign a dummy context to be written over by mi_switch,
104 * calling it to send this process off to oblivion.
105 * [The nullpcb allows us to minimize cost in mi_switch() by not having
106 * a special case].
107 */
108 struct proc *switch_to_inactive();
cpu_exit(p)109 cpu_exit(p)
110 register struct proc *p;
111 {
112 static struct pcb nullpcb; /* pcb to overwrite on last switch */
113
114 #if NNPX > 0
115 /* free cporcessor (if we have it) */
116 if( p == npxproc) npxproc =0;
117 #endif
118
119 /* move to inactive space and stack, passing arg accross */
120 p = switch_to_inactive(p);
121
122 /* drop per-process resources */
123 vmspace_free(p->p_vmspace);
124 kmem_free(kernel_map, (vm_offset_t)p->p_addr, ctob(UPAGES));
125
126 p->p_addr = (struct user *) &nullpcb;
127 mi_switch();
128 /* NOTREACHED */
129 }
130 #else
cpu_exit(p)131 cpu_exit(p)
132 register struct proc *p;
133 {
134
135 /* free coprocessor (if we have it) */
136 #if NNPX > 0
137 if( p == npxproc) npxproc =0;
138 #endif
139
140 curproc = p;
141 mi_switch();
142 }
143
144 cpu_wait(p) struct proc *p; {
145
146 /* drop per-process resources */
147 vmspace_free(p->p_vmspace);
148 kmem_free(kernel_map, (vm_offset_t)p->p_addr, ctob(UPAGES));
149 }
150 #endif
151
152 /*
153 * Dump the machine specific header information at the start of a core dump.
154 */
155 cpu_coredump(p, vp, cred)
156 struct proc *p;
157 struct vnode *vp;
158 struct ucred *cred;
159 {
160
161 return (vn_rdwr(UIO_WRITE, vp, (caddr_t) p->p_addr, ctob(UPAGES),
162 (off_t)0, UIO_SYSSPACE, IO_NODELOCKED|IO_UNIT, cred, (int *)NULL,
163 p));
164 }
165
166 /*
167 * Set a red zone in the kernel stack after the u. area.
168 */
setredzone(pte,vaddr)169 setredzone(pte, vaddr)
170 u_short *pte;
171 caddr_t vaddr;
172 {
173 /* eventually do this by setting up an expand-down stack segment
174 for ss0: selector, allowing stack access down to top of u.
175 this means though that protection violations need to be handled
176 thru a double fault exception that must do an integral task
177 switch to a known good context, within which a dump can be
178 taken. a sensible scheme might be to save the initial context
179 used by sched (that has physical memory mapped 1:1 at bottom)
180 and take the dump while still in mapped mode */
181 }
182
183 /*
184 * Move pages from one kernel virtual address to another.
185 * Both addresses are assumed to reside in the Sysmap,
186 * and size must be a multiple of CLSIZE.
187 */
pagemove(from,to,size)188 pagemove(from, to, size)
189 register caddr_t from, to;
190 int size;
191 {
192 register struct pte *fpte, *tpte;
193
194 if (size % CLBYTES)
195 panic("pagemove");
196 fpte = kvtopte(from);
197 tpte = kvtopte(to);
198 while (size > 0) {
199 *tpte++ = *fpte;
200 *(int *)fpte++ = 0;
201 from += NBPG;
202 to += NBPG;
203 size -= NBPG;
204 }
205 tlbflush();
206 }
207
208 /*
209 * Convert kernel VA to physical address
210 */
kvtop(addr)211 kvtop(addr)
212 register caddr_t addr;
213 {
214 vm_offset_t va;
215
216 va = pmap_extract(kernel_pmap, (vm_offset_t)addr);
217 if (va == 0)
218 panic("kvtop: zero page frame");
219 return((int)va);
220 }
221
222 #ifdef notdef
223 /*
224 * The probe[rw] routines should probably be redone in assembler
225 * for efficiency.
226 */
prober(addr)227 prober(addr)
228 register u_int addr;
229 {
230 register int page;
231 register struct proc *p;
232
233 if (addr >= USRSTACK)
234 return(0);
235 p = u.u_procp;
236 page = btop(addr);
237 if (page < dptov(p, p->p_dsize) || page > sptov(p, p->p_ssize))
238 return(1);
239 return(0);
240 }
241
probew(addr)242 probew(addr)
243 register u_int addr;
244 {
245 register int page;
246 register struct proc *p;
247
248 if (addr >= USRSTACK)
249 return(0);
250 p = u.u_procp;
251 page = btop(addr);
252 if (page < dptov(p, p->p_dsize) || page > sptov(p, p->p_ssize))
253 return((*(int *)vtopte(p, page) & PG_PROT) == PG_UW);
254 return(0);
255 }
256
257 /*
258 * NB: assumes a physically contiguous kernel page table
259 * (makes life a LOT simpler).
260 */
kernacc(addr,count,rw)261 kernacc(addr, count, rw)
262 register u_int addr;
263 int count, rw;
264 {
265 register struct pde *pde;
266 register struct pte *pte;
267 register int ix, cnt;
268 extern long Syssize;
269
270 if (count <= 0)
271 return(0);
272 pde = (struct pde *)((u_int)u.u_procp->p_p0br + u.u_procp->p_szpt * NBPG);
273 ix = (addr & PD_MASK) >> PD_SHIFT;
274 cnt = ((addr + count + (1 << PD_SHIFT) - 1) & PD_MASK) >> PD_SHIFT;
275 cnt -= ix;
276 for (pde += ix; cnt; cnt--, pde++)
277 if (pde->pd_v == 0)
278 return(0);
279 ix = btop(addr-0xfe000000);
280 cnt = btop(addr-0xfe000000+count+NBPG-1);
281 if (cnt > (int)&Syssize)
282 return(0);
283 cnt -= ix;
284 for (pte = &Sysmap[ix]; cnt; cnt--, pte++)
285 if (pte->pg_v == 0 /*|| (rw == B_WRITE && pte->pg_prot == 1)*/)
286 return(0);
287 return(1);
288 }
289
useracc(addr,count,rw)290 useracc(addr, count, rw)
291 register u_int addr;
292 int count, rw;
293 {
294 register int (*func)();
295 register u_int addr2;
296 extern int prober(), probew();
297
298 if (count <= 0)
299 return(0);
300 addr2 = addr;
301 addr += count;
302 func = (rw == B_READ) ? prober : probew;
303 do {
304 if ((*func)(addr2) == 0)
305 return(0);
306 addr2 = (addr2 + NBPG) & ~PGOFSET;
307 } while (addr2 < addr);
308 return(1);
309 }
310 #endif
311
312 extern vm_map_t phys_map;
313
314 /*
315 * Map an IO request into kernel virtual address space. Requests fall into
316 * one of five catagories:
317 *
318 * B_PHYS|B_UAREA: User u-area swap.
319 * Address is relative to start of u-area (p_addr).
320 * B_PHYS|B_PAGET: User page table swap.
321 * Address is a kernel VA in usrpt (Usrptmap).
322 * B_PHYS|B_DIRTY: Dirty page push.
323 * Address is a VA in proc2's address space.
324 * B_PHYS|B_PGIN: Kernel pagein of user pages.
325 * Address is VA in user's address space.
326 * B_PHYS: User "raw" IO request.
327 * Address is VA in user's address space.
328 *
329 * All requests are (re)mapped into kernel VA space via the useriomap
330 * (a name with only slightly more meaning than "kernelmap")
331 */
vmapbuf(bp)332 vmapbuf(bp)
333 register struct buf *bp;
334 {
335 register int npf;
336 register caddr_t addr;
337 register long flags = bp->b_flags;
338 struct proc *p;
339 int off;
340 vm_offset_t kva;
341 register vm_offset_t pa;
342
343 if ((flags & B_PHYS) == 0)
344 panic("vmapbuf");
345 addr = bp->b_saveaddr = bp->b_un.b_addr;
346 off = (int)addr & PGOFSET;
347 p = bp->b_proc;
348 npf = btoc(round_page(bp->b_bcount + off));
349 kva = kmem_alloc_wait(phys_map, ctob(npf));
350 bp->b_un.b_addr = (caddr_t) (kva + off);
351 while (npf--) {
352 pa = pmap_extract(&p->p_vmspace->vm_pmap, (vm_offset_t)addr);
353 if (pa == 0)
354 panic("vmapbuf: null page frame");
355 pmap_enter(vm_map_pmap(phys_map), kva, trunc_page(pa),
356 VM_PROT_READ|VM_PROT_WRITE, TRUE);
357 addr += PAGE_SIZE;
358 kva += PAGE_SIZE;
359 }
360 }
361
362 /*
363 * Free the io map PTEs associated with this IO operation.
364 * We also invalidate the TLB entries and restore the original b_addr.
365 */
vunmapbuf(bp)366 vunmapbuf(bp)
367 register struct buf *bp;
368 {
369 register int npf;
370 register caddr_t addr = bp->b_un.b_addr;
371 vm_offset_t kva;
372
373 if ((bp->b_flags & B_PHYS) == 0)
374 panic("vunmapbuf");
375 npf = btoc(round_page(bp->b_bcount + ((int)addr & PGOFSET)));
376 kva = (vm_offset_t)((int)addr & ~PGOFSET);
377 kmem_free_wakeup(phys_map, kva, ctob(npf));
378 bp->b_un.b_addr = bp->b_saveaddr;
379 bp->b_saveaddr = NULL;
380 }
381