1 /* $NetBSD: vm_machdep.c,v 1.46 2023/10/06 11:53:27 skrll Exp $ */
2
3 /*-
4 * Copyright (c) 1982, 1986 The Regents of the University of California.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to Berkeley by
8 * the Systems Programming Group of the University of Utah Computer
9 * Science Department, and William Jolitz.
10 *
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
13 * are met:
14 * 1. Redistributions of source code must retain the above copyright
15 * notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 * notice, this list of conditions and the following disclaimer in the
18 * documentation and/or other materials provided with the distribution.
19 * 3. Neither the name of the University nor the names of its contributors
20 * may be used to endorse or promote products derived from this software
21 * without specific prior written permission.
22 *
23 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
24 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
27 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
29 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
32 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33 * SUCH DAMAGE.
34 *
35 * @(#)vm_machdep.c 7.3 (Berkeley) 5/13/91
36 */
37
38 /*-
39 * Copyright (c) 1995 Charles M. Hannum. All rights reserved.
40 * Copyright (c) 1989, 1990 William Jolitz
41 * All rights reserved.
42 *
43 * This code is derived from software contributed to Berkeley by
44 * the Systems Programming Group of the University of Utah Computer
45 * Science Department, and William Jolitz.
46 *
47 * Redistribution and use in source and binary forms, with or without
48 * modification, are permitted provided that the following conditions
49 * are met:
50 * 1. Redistributions of source code must retain the above copyright
51 * notice, this list of conditions and the following disclaimer.
52 * 2. Redistributions in binary form must reproduce the above copyright
53 * notice, this list of conditions and the following disclaimer in the
54 * documentation and/or other materials provided with the distribution.
55 * 3. All advertising materials mentioning features or use of this software
56 * must display the following acknowledgement:
57 * This product includes software developed by the University of
58 * California, Berkeley and its contributors.
59 * 4. Neither the name of the University nor the names of its contributors
60 * may be used to endorse or promote products derived from this software
61 * without specific prior written permission.
62 *
63 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
64 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
65 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
66 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
67 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
68 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
69 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
70 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
71 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
72 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
73 * SUCH DAMAGE.
74 *
75 * @(#)vm_machdep.c 7.3 (Berkeley) 5/13/91
76 */
77
78 /*
79 * Utah $Hdr: vm_machdep.c 1.16.1.1 89/06/23$
80 */
81
82 #include <sys/cdefs.h>
83 __KERNEL_RCSID(0, "$NetBSD: vm_machdep.c,v 1.46 2023/10/06 11:53:27 skrll Exp $");
84
85 #include "opt_mtrr.h"
86
87 #include <sys/param.h>
88 #include <sys/systm.h>
89 #include <sys/proc.h>
90 #include <sys/vnode.h>
91 #include <sys/buf.h>
92 #include <sys/core.h>
93 #include <sys/exec.h>
94 #include <sys/ptrace.h>
95
96 #include <uvm/uvm.h>
97
98 #include <machine/cpu.h>
99 #include <machine/gdt.h>
100 #include <machine/reg.h>
101 #include <machine/specialreg.h>
102
103 #ifdef MTRR
104 #include <machine/mtrr.h>
105 #endif
106
107 #include <x86/fpu.h>
108 #include <x86/dbregs.h>
109
110 extern struct pool x86_dbregspl;
111
112 void
cpu_proc_fork(struct proc * p1,struct proc * p2)113 cpu_proc_fork(struct proc *p1, struct proc *p2)
114 {
115
116 p2->p_md.md_flags = p1->p_md.md_flags;
117 }
118
119 /*
120 * cpu_lwp_fork: finish a new LWP (l2) operation.
121 *
122 * First LWP (l1) is the process being forked. If it is &lwp0, then we
123 * are creating a kthread, where return path and argument are specified
124 * with `func' and `arg'.
125 *
126 * If an alternate user-level stack is requested (with non-zero values
127 * in both the stack and stacksize arguments), then set up the user stack
128 * pointer accordingly.
129 */
130 void
cpu_lwp_fork(struct lwp * l1,struct lwp * l2,void * stack,size_t stacksize,void (* func)(void *),void * arg)131 cpu_lwp_fork(struct lwp *l1, struct lwp *l2, void *stack, size_t stacksize,
132 void (*func)(void *), void *arg)
133 {
134 struct pcb *pcb1, *pcb2;
135 struct trapframe *tf;
136 struct switchframe *sf;
137 vaddr_t uv;
138
139 KASSERT(l1 == curlwp || l1 == &lwp0);
140
141 pcb1 = lwp_getpcb(l1);
142 pcb2 = lwp_getpcb(l2);
143
144 /* Copy the PCB from parent, except the FPU state. */
145 memcpy(pcb2, pcb1, offsetof(struct pcb, pcb_savefpu));
146
147 /* Fork the FPU state. */
148 fpu_lwp_fork(l1, l2);
149
150 /* Never inherit CPU Debug Registers */
151 pcb2->pcb_dbregs = NULL;
152 pcb2->pcb_flags &= ~PCB_DBREGS;
153
154 #if defined(XENPV)
155 pcb2->pcb_iopl = IOPL_KPL;
156 #endif
157
158 /*
159 * Set the kernel stack address (from the address to uarea) and
160 * trapframe address for child.
161 *
162 * Rig kernel stack so that it would start out in lwp_trampoline()
163 * and call child_return() with l2 as an argument. This causes the
164 * newly-created child process to go directly to user level with a
165 * parent return value of 0 from fork(), while the parent process
166 * returns normally.
167 */
168 uv = uvm_lwp_getuarea(l2);
169 KASSERT(uv % PAGE_SIZE == 0);
170
171 #ifdef __x86_64__
172 #ifdef SVS
173 pcb2->pcb_rsp0 = (uv + USPACE - PAGE_SIZE +
174 sizeof(struct trapframe));
175 KASSERT((pcb2->pcb_rsp0 & 0xF) == 0);
176 #else
177 pcb2->pcb_rsp0 = (uv + USPACE - 16);
178 #endif
179 tf = (struct trapframe *)pcb2->pcb_rsp0 - 1;
180 #else
181 pcb2->pcb_esp0 = (uv + USPACE - 16);
182 tf = (struct trapframe *)pcb2->pcb_esp0 - 1;
183
184 pcb2->pcb_iomap = NULL;
185 #endif
186 l2->l_md.md_regs = tf;
187
188 /*
189 * Copy the trapframe from parent, so that return to userspace
190 * will be to right address, with correct registers.
191 */
192 memcpy(tf, l1->l_md.md_regs, sizeof(struct trapframe));
193
194 /* Child LWP might get aston() before returning to userspace. */
195 tf->tf_trapno = T_ASTFLT;
196
197 /* If specified, set a different user stack for a child. */
198 if (stack != NULL) {
199 #ifdef __x86_64__
200 tf->tf_rsp = (uint64_t)stack + stacksize;
201 #else
202 tf->tf_esp = (uint32_t)stack + stacksize;
203 #endif
204 }
205
206 l2->l_md.md_flags = l1->l_md.md_flags;
207 KASSERT(l2->l_md.md_astpending == 0);
208
209 sf = (struct switchframe *)tf - 1;
210
211 #ifdef __x86_64__
212 sf->sf_r12 = (uint64_t)func;
213 sf->sf_r13 = (uint64_t)arg;
214 sf->sf_rip = (uint64_t)lwp_trampoline;
215 pcb2->pcb_rsp = (uint64_t)sf;
216 pcb2->pcb_rbp = (uint64_t)l2;
217 #else
218 /*
219 * XXX Is there a reason sf->sf_edi isn't initialized here?
220 * Could this leak potentially sensitive information to new
221 * userspace processes?
222 */
223 sf->sf_esi = (int)func;
224 sf->sf_ebx = (int)arg;
225 sf->sf_eip = (int)lwp_trampoline;
226 pcb2->pcb_esp = (int)sf;
227 pcb2->pcb_ebp = (int)l2;
228 #endif
229 }
230
231 /*
232 * cpu_lwp_free is called from exit() to let machine-dependent
233 * code free machine-dependent resources. Note that this routine
234 * must not block. NB: this may be called with l != curlwp in
235 * error paths.
236 */
237 void
cpu_lwp_free(struct lwp * l,int proc)238 cpu_lwp_free(struct lwp *l, int proc)
239 {
240
241 if (l != curlwp)
242 return;
243
244 /* Abandon the FPU state. */
245 fpu_lwp_abandon(l);
246
247 /* Abandon the dbregs state. */
248 x86_dbregs_abandon(l);
249
250 #ifdef MTRR
251 if (proc && l->l_proc->p_md.md_flags & MDP_USEDMTRR)
252 mtrr_clean(l->l_proc);
253 #endif
254 }
255
256 /*
257 * cpu_lwp_free2 is called when an LWP is being reaped.
258 * This routine may block.
259 */
260 void
cpu_lwp_free2(struct lwp * l)261 cpu_lwp_free2(struct lwp *l)
262 {
263 struct pcb *pcb;
264
265 pcb = lwp_getpcb(l);
266 KASSERT((pcb->pcb_flags & PCB_DBREGS) == 0);
267 if (pcb->pcb_dbregs) {
268 pool_put(&x86_dbregspl, pcb->pcb_dbregs);
269 pcb->pcb_dbregs = NULL;
270 }
271 }
272
273 /*
274 * Convert kernel VA to physical address
275 */
276 paddr_t
kvtop(void * addr)277 kvtop(void *addr)
278 {
279 paddr_t pa;
280 bool ret __diagused;
281
282 ret = pmap_extract(pmap_kernel(), (vaddr_t)addr, &pa);
283 KASSERT(ret == true);
284 return pa;
285 }
286
287 /*
288 * Map a user I/O request into kernel virtual address space.
289 * Note: the pages are already locked by uvm_vslock(), so we
290 * do not need to pass an access_type to pmap_enter().
291 */
292 int
vmapbuf(struct buf * bp,vsize_t len)293 vmapbuf(struct buf *bp, vsize_t len)
294 {
295 vaddr_t faddr, taddr, off;
296 paddr_t fpa;
297
298 KASSERT((bp->b_flags & B_PHYS) != 0);
299
300 bp->b_saveaddr = bp->b_data;
301 faddr = trunc_page((vaddr_t)bp->b_data);
302 off = (vaddr_t)bp->b_data - faddr;
303 len = round_page(off + len);
304 taddr = uvm_km_alloc(phys_map, len, 0, UVM_KMF_VAONLY | UVM_KMF_WAITVA);
305 bp->b_data = (void *)(taddr + off);
306 /*
307 * The region is locked, so we expect that pmap_extract() will return
308 * true.
309 * XXX: unwise to expect this in a multithreaded environment.
310 * anything can happen to a pmap between the time we lock a
311 * region, release the pmap lock, and then relock it for
312 * the pmap_extract().
313 *
314 * no need to flush TLB since we expect nothing to be mapped
315 * where we just allocated (TLB will be flushed when our
316 * mapping is removed).
317 */
318 while (len) {
319 (void) pmap_extract(vm_map_pmap(&bp->b_proc->p_vmspace->vm_map),
320 faddr, &fpa);
321 pmap_kenter_pa(taddr, fpa, VM_PROT_READ|VM_PROT_WRITE, 0);
322 faddr += PAGE_SIZE;
323 taddr += PAGE_SIZE;
324 len -= PAGE_SIZE;
325 }
326 pmap_update(pmap_kernel());
327
328 return 0;
329 }
330
331 /*
332 * Unmap a previously-mapped user I/O request.
333 */
334 void
vunmapbuf(struct buf * bp,vsize_t len)335 vunmapbuf(struct buf *bp, vsize_t len)
336 {
337 vaddr_t addr, off;
338
339 KASSERT((bp->b_flags & B_PHYS) != 0);
340
341 addr = trunc_page((vaddr_t)bp->b_data);
342 off = (vaddr_t)bp->b_data - addr;
343 len = round_page(off + len);
344 pmap_kremove(addr, len);
345 pmap_update(pmap_kernel());
346 uvm_km_free(phys_map, addr, len, UVM_KMF_VAONLY);
347 bp->b_data = bp->b_saveaddr;
348 bp->b_saveaddr = 0;
349 }
350
351 #ifdef __HAVE_CPU_UAREA_ROUTINES
352 /*
353 * Layout of the uarea:
354 * Page[0] = PCB
355 * Page[1] = RedZone
356 * Page[2] = Stack
357 * Page[...] = Stack
358 * Page[UPAGES-1] = Stack
359 * Page[UPAGES] = RedZone
360 * There is a redzone at the beginning of the stack, and another one at the
361 * end. The former is to protect against deep recursions that could corrupt
362 * the PCB, the latter to protect against severe stack overflows.
363 */
364 void *
cpu_uarea_alloc(bool system)365 cpu_uarea_alloc(bool system)
366 {
367 vaddr_t base, va;
368 paddr_t pa;
369
370 base = uvm_km_alloc(kernel_map, USPACE + PAGE_SIZE, 0,
371 UVM_KMF_WIRED|UVM_KMF_WAITVA);
372
373 /* Page[1] = RedZone */
374 va = base + PAGE_SIZE;
375 if (!pmap_extract(pmap_kernel(), va, &pa)) {
376 panic("%s: impossible, Page[1] unmapped", __func__);
377 }
378 pmap_kremove(va, PAGE_SIZE);
379 uvm_pagefree(PHYS_TO_VM_PAGE(pa));
380
381 /* Page[UPAGES] = RedZone */
382 va = base + USPACE;
383 if (!pmap_extract(pmap_kernel(), va, &pa)) {
384 panic("%s: impossible, Page[UPAGES] unmapped", __func__);
385 }
386 pmap_kremove(va, PAGE_SIZE);
387 uvm_pagefree(PHYS_TO_VM_PAGE(pa));
388
389 pmap_update(pmap_kernel());
390
391 return (void *)base;
392 }
393
394 bool
cpu_uarea_free(void * addr)395 cpu_uarea_free(void *addr)
396 {
397 vaddr_t base = (vaddr_t)addr;
398
399 KASSERT(!pmap_extract(pmap_kernel(), base + PAGE_SIZE, NULL));
400 KASSERT(!pmap_extract(pmap_kernel(), base + USPACE, NULL));
401 uvm_km_free(kernel_map, base, USPACE + PAGE_SIZE, UVM_KMF_WIRED);
402 return true;
403 }
404 #endif /* __HAVE_CPU_UAREA_ROUTINES */
405