xref: /netbsd-src/sys/arch/x86/x86/vm_machdep.c (revision d40146f873f6860ee1d6b4bb79d77a0c6cf5108d)
1 /*	$NetBSD: vm_machdep.c,v 1.46 2023/10/06 11:53:27 skrll Exp $	*/
2 
3 /*-
4  * Copyright (c) 1982, 1986 The Regents of the University of California.
5  * All rights reserved.
6  *
7  * This code is derived from software contributed to Berkeley by
8  * the Systems Programming Group of the University of Utah Computer
9  * Science Department, and William Jolitz.
10  *
11  * Redistribution and use in source and binary forms, with or without
12  * modification, are permitted provided that the following conditions
13  * are met:
14  * 1. Redistributions of source code must retain the above copyright
15  *    notice, this list of conditions and the following disclaimer.
16  * 2. Redistributions in binary form must reproduce the above copyright
17  *    notice, this list of conditions and the following disclaimer in the
18  *    documentation and/or other materials provided with the distribution.
19  * 3. Neither the name of the University nor the names of its contributors
20  *    may be used to endorse or promote products derived from this software
21  *    without specific prior written permission.
22  *
23  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
24  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
27  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
29  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
32  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33  * SUCH DAMAGE.
34  *
35  *	@(#)vm_machdep.c	7.3 (Berkeley) 5/13/91
36  */
37 
38 /*-
39  * Copyright (c) 1995 Charles M. Hannum.  All rights reserved.
40  * Copyright (c) 1989, 1990 William Jolitz
41  * All rights reserved.
42  *
43  * This code is derived from software contributed to Berkeley by
44  * the Systems Programming Group of the University of Utah Computer
45  * Science Department, and William Jolitz.
46  *
47  * Redistribution and use in source and binary forms, with or without
48  * modification, are permitted provided that the following conditions
49  * are met:
50  * 1. Redistributions of source code must retain the above copyright
51  *    notice, this list of conditions and the following disclaimer.
52  * 2. Redistributions in binary form must reproduce the above copyright
53  *    notice, this list of conditions and the following disclaimer in the
54  *    documentation and/or other materials provided with the distribution.
55  * 3. All advertising materials mentioning features or use of this software
56  *    must display the following acknowledgement:
57  *	This product includes software developed by the University of
58  *	California, Berkeley and its contributors.
59  * 4. Neither the name of the University nor the names of its contributors
60  *    may be used to endorse or promote products derived from this software
61  *    without specific prior written permission.
62  *
63  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
64  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
65  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
66  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
67  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
68  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
69  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
70  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
71  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
72  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
73  * SUCH DAMAGE.
74  *
75  *	@(#)vm_machdep.c	7.3 (Berkeley) 5/13/91
76  */
77 
78 /*
79  *	Utah $Hdr: vm_machdep.c 1.16.1.1 89/06/23$
80  */
81 
82 #include <sys/cdefs.h>
83 __KERNEL_RCSID(0, "$NetBSD: vm_machdep.c,v 1.46 2023/10/06 11:53:27 skrll Exp $");
84 
85 #include "opt_mtrr.h"
86 
87 #include <sys/param.h>
88 #include <sys/systm.h>
89 #include <sys/proc.h>
90 #include <sys/vnode.h>
91 #include <sys/buf.h>
92 #include <sys/core.h>
93 #include <sys/exec.h>
94 #include <sys/ptrace.h>
95 
96 #include <uvm/uvm.h>
97 
98 #include <machine/cpu.h>
99 #include <machine/gdt.h>
100 #include <machine/reg.h>
101 #include <machine/specialreg.h>
102 
103 #ifdef MTRR
104 #include <machine/mtrr.h>
105 #endif
106 
107 #include <x86/fpu.h>
108 #include <x86/dbregs.h>
109 
110 extern struct pool x86_dbregspl;
111 
112 void
cpu_proc_fork(struct proc * p1,struct proc * p2)113 cpu_proc_fork(struct proc *p1, struct proc *p2)
114 {
115 
116 	p2->p_md.md_flags = p1->p_md.md_flags;
117 }
118 
119 /*
120  * cpu_lwp_fork: finish a new LWP (l2) operation.
121  *
122  * First LWP (l1) is the process being forked.  If it is &lwp0, then we
123  * are creating a kthread, where return path and argument are specified
124  * with `func' and `arg'.
125  *
126  * If an alternate user-level stack is requested (with non-zero values
127  * in both the stack and stacksize arguments), then set up the user stack
128  * pointer accordingly.
129  */
130 void
cpu_lwp_fork(struct lwp * l1,struct lwp * l2,void * stack,size_t stacksize,void (* func)(void *),void * arg)131 cpu_lwp_fork(struct lwp *l1, struct lwp *l2, void *stack, size_t stacksize,
132     void (*func)(void *), void *arg)
133 {
134 	struct pcb *pcb1, *pcb2;
135 	struct trapframe *tf;
136 	struct switchframe *sf;
137 	vaddr_t uv;
138 
139 	KASSERT(l1 == curlwp || l1 == &lwp0);
140 
141 	pcb1 = lwp_getpcb(l1);
142 	pcb2 = lwp_getpcb(l2);
143 
144 	/* Copy the PCB from parent, except the FPU state. */
145 	memcpy(pcb2, pcb1, offsetof(struct pcb, pcb_savefpu));
146 
147 	/* Fork the FPU state. */
148 	fpu_lwp_fork(l1, l2);
149 
150 	/* Never inherit CPU Debug Registers */
151 	pcb2->pcb_dbregs = NULL;
152 	pcb2->pcb_flags &= ~PCB_DBREGS;
153 
154 #if defined(XENPV)
155 	pcb2->pcb_iopl = IOPL_KPL;
156 #endif
157 
158 	/*
159 	 * Set the kernel stack address (from the address to uarea) and
160 	 * trapframe address for child.
161 	 *
162 	 * Rig kernel stack so that it would start out in lwp_trampoline()
163 	 * and call child_return() with l2 as an argument.  This causes the
164 	 * newly-created child process to go directly to user level with a
165 	 * parent return value of 0 from fork(), while the parent process
166 	 * returns normally.
167 	 */
168 	uv = uvm_lwp_getuarea(l2);
169 	KASSERT(uv % PAGE_SIZE == 0);
170 
171 #ifdef __x86_64__
172 #ifdef SVS
173 	pcb2->pcb_rsp0 = (uv + USPACE - PAGE_SIZE +
174 	    sizeof(struct trapframe));
175 	KASSERT((pcb2->pcb_rsp0 & 0xF) == 0);
176 #else
177 	pcb2->pcb_rsp0 = (uv + USPACE - 16);
178 #endif
179 	tf = (struct trapframe *)pcb2->pcb_rsp0 - 1;
180 #else
181 	pcb2->pcb_esp0 = (uv + USPACE - 16);
182 	tf = (struct trapframe *)pcb2->pcb_esp0 - 1;
183 
184 	pcb2->pcb_iomap = NULL;
185 #endif
186 	l2->l_md.md_regs = tf;
187 
188 	/*
189 	 * Copy the trapframe from parent, so that return to userspace
190 	 * will be to right address, with correct registers.
191 	 */
192 	memcpy(tf, l1->l_md.md_regs, sizeof(struct trapframe));
193 
194 	/* Child LWP might get aston() before returning to userspace. */
195 	tf->tf_trapno = T_ASTFLT;
196 
197 	/* If specified, set a different user stack for a child. */
198 	if (stack != NULL) {
199 #ifdef __x86_64__
200 		tf->tf_rsp = (uint64_t)stack + stacksize;
201 #else
202 		tf->tf_esp = (uint32_t)stack + stacksize;
203 #endif
204 	}
205 
206 	l2->l_md.md_flags = l1->l_md.md_flags;
207 	KASSERT(l2->l_md.md_astpending == 0);
208 
209 	sf = (struct switchframe *)tf - 1;
210 
211 #ifdef __x86_64__
212 	sf->sf_r12 = (uint64_t)func;
213 	sf->sf_r13 = (uint64_t)arg;
214 	sf->sf_rip = (uint64_t)lwp_trampoline;
215 	pcb2->pcb_rsp = (uint64_t)sf;
216 	pcb2->pcb_rbp = (uint64_t)l2;
217 #else
218 	/*
219 	 * XXX Is there a reason sf->sf_edi isn't initialized here?
220 	 * Could this leak potentially sensitive information to new
221 	 * userspace processes?
222 	 */
223 	sf->sf_esi = (int)func;
224 	sf->sf_ebx = (int)arg;
225 	sf->sf_eip = (int)lwp_trampoline;
226 	pcb2->pcb_esp = (int)sf;
227 	pcb2->pcb_ebp = (int)l2;
228 #endif
229 }
230 
231 /*
232  * cpu_lwp_free is called from exit() to let machine-dependent
233  * code free machine-dependent resources.  Note that this routine
234  * must not block.  NB: this may be called with l != curlwp in
235  * error paths.
236  */
237 void
cpu_lwp_free(struct lwp * l,int proc)238 cpu_lwp_free(struct lwp *l, int proc)
239 {
240 
241 	if (l != curlwp)
242 		return;
243 
244 	/* Abandon the FPU state. */
245 	fpu_lwp_abandon(l);
246 
247 	/* Abandon the dbregs state. */
248 	x86_dbregs_abandon(l);
249 
250 #ifdef MTRR
251 	if (proc && l->l_proc->p_md.md_flags & MDP_USEDMTRR)
252 		mtrr_clean(l->l_proc);
253 #endif
254 }
255 
256 /*
257  * cpu_lwp_free2 is called when an LWP is being reaped.
258  * This routine may block.
259  */
260 void
cpu_lwp_free2(struct lwp * l)261 cpu_lwp_free2(struct lwp *l)
262 {
263 	struct pcb *pcb;
264 
265 	pcb = lwp_getpcb(l);
266 	KASSERT((pcb->pcb_flags & PCB_DBREGS) == 0);
267 	if (pcb->pcb_dbregs) {
268 		pool_put(&x86_dbregspl, pcb->pcb_dbregs);
269 		pcb->pcb_dbregs = NULL;
270 	}
271 }
272 
273 /*
274  * Convert kernel VA to physical address
275  */
276 paddr_t
kvtop(void * addr)277 kvtop(void *addr)
278 {
279 	paddr_t pa;
280 	bool ret __diagused;
281 
282 	ret = pmap_extract(pmap_kernel(), (vaddr_t)addr, &pa);
283 	KASSERT(ret == true);
284 	return pa;
285 }
286 
287 /*
288  * Map a user I/O request into kernel virtual address space.
289  * Note: the pages are already locked by uvm_vslock(), so we
290  * do not need to pass an access_type to pmap_enter().
291  */
292 int
vmapbuf(struct buf * bp,vsize_t len)293 vmapbuf(struct buf *bp, vsize_t len)
294 {
295 	vaddr_t faddr, taddr, off;
296 	paddr_t fpa;
297 
298 	KASSERT((bp->b_flags & B_PHYS) != 0);
299 
300 	bp->b_saveaddr = bp->b_data;
301 	faddr = trunc_page((vaddr_t)bp->b_data);
302 	off = (vaddr_t)bp->b_data - faddr;
303 	len = round_page(off + len);
304 	taddr = uvm_km_alloc(phys_map, len, 0, UVM_KMF_VAONLY | UVM_KMF_WAITVA);
305 	bp->b_data = (void *)(taddr + off);
306 	/*
307 	 * The region is locked, so we expect that pmap_extract() will return
308 	 * true.
309 	 * XXX: unwise to expect this in a multithreaded environment.
310 	 * anything can happen to a pmap between the time we lock a
311 	 * region, release the pmap lock, and then relock it for
312 	 * the pmap_extract().
313 	 *
314 	 * no need to flush TLB since we expect nothing to be mapped
315 	 * where we just allocated (TLB will be flushed when our
316 	 * mapping is removed).
317 	 */
318 	while (len) {
319 		(void) pmap_extract(vm_map_pmap(&bp->b_proc->p_vmspace->vm_map),
320 		    faddr, &fpa);
321 		pmap_kenter_pa(taddr, fpa, VM_PROT_READ|VM_PROT_WRITE, 0);
322 		faddr += PAGE_SIZE;
323 		taddr += PAGE_SIZE;
324 		len -= PAGE_SIZE;
325 	}
326 	pmap_update(pmap_kernel());
327 
328 	return 0;
329 }
330 
331 /*
332  * Unmap a previously-mapped user I/O request.
333  */
334 void
vunmapbuf(struct buf * bp,vsize_t len)335 vunmapbuf(struct buf *bp, vsize_t len)
336 {
337 	vaddr_t addr, off;
338 
339 	KASSERT((bp->b_flags & B_PHYS) != 0);
340 
341 	addr = trunc_page((vaddr_t)bp->b_data);
342 	off = (vaddr_t)bp->b_data - addr;
343 	len = round_page(off + len);
344 	pmap_kremove(addr, len);
345 	pmap_update(pmap_kernel());
346 	uvm_km_free(phys_map, addr, len, UVM_KMF_VAONLY);
347 	bp->b_data = bp->b_saveaddr;
348 	bp->b_saveaddr = 0;
349 }
350 
351 #ifdef __HAVE_CPU_UAREA_ROUTINES
352 /*
353  * Layout of the uarea:
354  *    Page[0]        = PCB
355  *    Page[1]        = RedZone
356  *    Page[2]        = Stack
357  *    Page[...]      = Stack
358  *    Page[UPAGES-1] = Stack
359  *    Page[UPAGES]   = RedZone
360  * There is a redzone at the beginning of the stack, and another one at the
361  * end. The former is to protect against deep recursions that could corrupt
362  * the PCB, the latter to protect against severe stack overflows.
363  */
364 void *
cpu_uarea_alloc(bool system)365 cpu_uarea_alloc(bool system)
366 {
367 	vaddr_t base, va;
368 	paddr_t pa;
369 
370 	base = uvm_km_alloc(kernel_map, USPACE + PAGE_SIZE, 0,
371 	    UVM_KMF_WIRED|UVM_KMF_WAITVA);
372 
373 	/* Page[1] = RedZone */
374 	va = base + PAGE_SIZE;
375 	if (!pmap_extract(pmap_kernel(), va, &pa)) {
376 		panic("%s: impossible, Page[1] unmapped", __func__);
377 	}
378 	pmap_kremove(va, PAGE_SIZE);
379 	uvm_pagefree(PHYS_TO_VM_PAGE(pa));
380 
381 	/* Page[UPAGES] = RedZone */
382 	va = base + USPACE;
383 	if (!pmap_extract(pmap_kernel(), va, &pa)) {
384 		panic("%s: impossible, Page[UPAGES] unmapped", __func__);
385 	}
386 	pmap_kremove(va, PAGE_SIZE);
387 	uvm_pagefree(PHYS_TO_VM_PAGE(pa));
388 
389 	pmap_update(pmap_kernel());
390 
391 	return (void *)base;
392 }
393 
394 bool
cpu_uarea_free(void * addr)395 cpu_uarea_free(void *addr)
396 {
397 	vaddr_t base = (vaddr_t)addr;
398 
399 	KASSERT(!pmap_extract(pmap_kernel(), base + PAGE_SIZE, NULL));
400 	KASSERT(!pmap_extract(pmap_kernel(), base + USPACE, NULL));
401 	uvm_km_free(kernel_map, base, USPACE + PAGE_SIZE, UVM_KMF_WIRED);
402 	return true;
403 }
404 #endif /* __HAVE_CPU_UAREA_ROUTINES */
405