10e6594a8SSascha Wildner /*-
20e6594a8SSascha Wildner * Copyright (c) 1982, 1986 The Regents of the University of California.
30e6594a8SSascha Wildner * Copyright (c) 1989, 1990 William Jolitz
40e6594a8SSascha Wildner * Copyright (c) 1994 John Dyson
50e6594a8SSascha Wildner * Copyright (c) 2008 The DragonFly Project.
60e6594a8SSascha Wildner * All rights reserved.
70e6594a8SSascha Wildner *
80e6594a8SSascha Wildner * This code is derived from software contributed to Berkeley by
90e6594a8SSascha Wildner * the Systems Programming Group of the University of Utah Computer
100e6594a8SSascha Wildner * Science Department, and William Jolitz.
110e6594a8SSascha Wildner *
120e6594a8SSascha Wildner * Redistribution and use in source and binary forms, with or without
130e6594a8SSascha Wildner * modification, are permitted provided that the following conditions
140e6594a8SSascha Wildner * are met:
150e6594a8SSascha Wildner * 1. Redistributions of source code must retain the above copyright
160e6594a8SSascha Wildner * notice, this list of conditions and the following disclaimer.
170e6594a8SSascha Wildner * 2. Redistributions in binary form must reproduce the above copyright
180e6594a8SSascha Wildner * notice, this list of conditions and the following disclaimer in the
190e6594a8SSascha Wildner * documentation and/or other materials provided with the distribution.
200e6594a8SSascha Wildner * 3. All advertising materials mentioning features or use of this software
210e6594a8SSascha Wildner * must display the following acknowledgement:
220e6594a8SSascha Wildner * This product includes software developed by the University of
230e6594a8SSascha Wildner * California, Berkeley and its contributors.
240e6594a8SSascha Wildner * 4. Neither the name of the University nor the names of its contributors
250e6594a8SSascha Wildner * may be used to endorse or promote products derived from this software
260e6594a8SSascha Wildner * without specific prior written permission.
270e6594a8SSascha Wildner *
280e6594a8SSascha Wildner * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
290e6594a8SSascha Wildner * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
300e6594a8SSascha Wildner * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
310e6594a8SSascha Wildner * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
320e6594a8SSascha Wildner * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
330e6594a8SSascha Wildner * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
340e6594a8SSascha Wildner * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
350e6594a8SSascha Wildner * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
360e6594a8SSascha Wildner * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
370e6594a8SSascha Wildner * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
380e6594a8SSascha Wildner * SUCH DAMAGE.
390e6594a8SSascha Wildner *
400e6594a8SSascha Wildner * from: @(#)vm_machdep.c 7.3 (Berkeley) 5/13/91
410e6594a8SSascha Wildner * Utah $Hdr: vm_machdep.c 1.16.1.1 89/06/23$
420e6594a8SSascha Wildner * $FreeBSD: src/sys/i386/i386/vm_machdep.c,v 1.132.2.9 2003/01/25 19:02:23 dillon Exp $
430e6594a8SSascha Wildner */
440e6594a8SSascha Wildner
450e6594a8SSascha Wildner #include <sys/param.h>
460e6594a8SSascha Wildner #include <sys/systm.h>
470e6594a8SSascha Wildner #include <sys/malloc.h>
480e6594a8SSascha Wildner #include <sys/proc.h>
490e6594a8SSascha Wildner #include <sys/buf.h>
500e6594a8SSascha Wildner #include <sys/interrupt.h>
510e6594a8SSascha Wildner #include <sys/vnode.h>
520e6594a8SSascha Wildner #include <sys/vmmeter.h>
530e6594a8SSascha Wildner #include <sys/kernel.h>
540e6594a8SSascha Wildner #include <sys/sysctl.h>
550e6594a8SSascha Wildner #include <sys/unistd.h>
56193c5c43SAntonio Huete Jimenez #include <sys/lwp.h>
570e6594a8SSascha Wildner
580e6594a8SSascha Wildner #include <machine/clock.h>
590e6594a8SSascha Wildner #include <machine/cpu.h>
600e6594a8SSascha Wildner #include <machine/md_var.h>
610e6594a8SSascha Wildner #include <machine/smp.h>
620e6594a8SSascha Wildner #include <machine/pcb.h>
630e6594a8SSascha Wildner #include <machine/pcb_ext.h>
640e6594a8SSascha Wildner #include <machine/segments.h>
650e6594a8SSascha Wildner #include <machine/globaldata.h> /* npxthread */
660e6594a8SSascha Wildner
670e6594a8SSascha Wildner #include <vm/vm.h>
680e6594a8SSascha Wildner #include <vm/vm_param.h>
690e6594a8SSascha Wildner #include <sys/lock.h>
700e6594a8SSascha Wildner #include <vm/vm_kern.h>
710e6594a8SSascha Wildner #include <vm/vm_page.h>
720e6594a8SSascha Wildner #include <vm/vm_map.h>
730e6594a8SSascha Wildner #include <vm/vm_extern.h>
740e6594a8SSascha Wildner
750e6594a8SSascha Wildner #include <sys/thread2.h>
760e6594a8SSascha Wildner
770e6594a8SSascha Wildner #include <bus/isa/isa.h>
780e6594a8SSascha Wildner
790e6594a8SSascha Wildner char machine[] = MACHINE;
800e6594a8SSascha Wildner SYSCTL_STRING(_hw, HW_MACHINE, machine, CTLFLAG_RD,
810e6594a8SSascha Wildner machine, 0, "Machine class");
820e6594a8SSascha Wildner
83ea9728caSSepherosa Ziehau u_int cpu_vendor_id = 0; /* XXX */
84ea9728caSSepherosa Ziehau
850e6594a8SSascha Wildner /*
860e6594a8SSascha Wildner * Finish a fork operation, with lwp lp2 nearly set up.
870e6594a8SSascha Wildner * Copy and update the pcb, set up the stack so that the child
880e6594a8SSascha Wildner * ready to run and return to user mode.
890e6594a8SSascha Wildner */
900e6594a8SSascha Wildner void
cpu_fork(struct lwp * lp1,struct lwp * lp2,int flags)910e6594a8SSascha Wildner cpu_fork(struct lwp *lp1, struct lwp *lp2, int flags)
920e6594a8SSascha Wildner {
930e6594a8SSascha Wildner struct pcb *pcb2;
940e6594a8SSascha Wildner
950e6594a8SSascha Wildner if ((flags & RFPROC) == 0) {
960e6594a8SSascha Wildner if ((flags & RFMEM) == 0) {
970e6594a8SSascha Wildner /* unshare user LDT */
980e6594a8SSascha Wildner struct pcb *pcb1 = lp1->lwp_thread->td_pcb;
990e6594a8SSascha Wildner struct pcb_ldt *pcb_ldt = pcb1->pcb_ldt;
1000e6594a8SSascha Wildner if (pcb_ldt && pcb_ldt->ldt_refcnt > 1) {
1010e6594a8SSascha Wildner pcb_ldt = user_ldt_alloc(pcb1,pcb_ldt->ldt_len);
1020e6594a8SSascha Wildner user_ldt_free(pcb1);
1030e6594a8SSascha Wildner pcb1->pcb_ldt = pcb_ldt;
1040e6594a8SSascha Wildner set_user_ldt(pcb1);
1050e6594a8SSascha Wildner }
1060e6594a8SSascha Wildner }
1070e6594a8SSascha Wildner return;
1080e6594a8SSascha Wildner }
1090e6594a8SSascha Wildner
1100e6594a8SSascha Wildner /* Ensure that lp1's pcb is up to date. */
1110e6594a8SSascha Wildner if (mdcpu->gd_npxthread == lp1->lwp_thread)
1120e6594a8SSascha Wildner npxsave(lp1->lwp_thread->td_savefpu);
1130e6594a8SSascha Wildner
1140e6594a8SSascha Wildner /*
1150e6594a8SSascha Wildner * Copy lp1's PCB. This really only applies to the
1160e6594a8SSascha Wildner * debug registers and FP state, but its faster to just copy the
1170e6594a8SSascha Wildner * whole thing. Because we only save the PCB at switchout time,
1180e6594a8SSascha Wildner * the register state may not be current.
1190e6594a8SSascha Wildner */
1200e6594a8SSascha Wildner pcb2 = lp2->lwp_thread->td_pcb;
1210e6594a8SSascha Wildner *pcb2 = *lp1->lwp_thread->td_pcb;
1220e6594a8SSascha Wildner
1230e6594a8SSascha Wildner /*
1240e6594a8SSascha Wildner * Create a new fresh stack for the new process.
1250e6594a8SSascha Wildner * Copy the trap frame for the return to user mode as if from a
1260e6594a8SSascha Wildner * syscall. This copies the user mode register values.
1270e6594a8SSascha Wildner *
1280e6594a8SSascha Wildner * pcb_rsp must allocate an additional call-return pointer below
1290e6594a8SSascha Wildner * the trap frame which will be restored by cpu_heavy_restore from
1300e6594a8SSascha Wildner * PCB_RIP, and the thread's td_sp pointer must allocate an
1310e6594a8SSascha Wildner * additonal two quadwords below the pcb_rsp call-return pointer to
1320e6594a8SSascha Wildner * hold the LWKT restore function pointer and rflags.
1330e6594a8SSascha Wildner *
1340e6594a8SSascha Wildner * The LWKT restore function pointer must be set to cpu_heavy_restore,
1350e6594a8SSascha Wildner * which is our standard heavy-weight process switch-in function.
1360e6594a8SSascha Wildner * YYY eventually we should shortcut fork_return and fork_trampoline
1370e6594a8SSascha Wildner * to use the LWKT restore function directly so we can get rid of
1380e6594a8SSascha Wildner * all the extra crap we are setting up.
1390e6594a8SSascha Wildner */
1400e6594a8SSascha Wildner lp2->lwp_md.md_regs = (struct trapframe *)pcb2 - 1;
1410e6594a8SSascha Wildner bcopy(lp1->lwp_md.md_regs, lp2->lwp_md.md_regs, sizeof(*lp2->lwp_md.md_regs));
1420e6594a8SSascha Wildner
1430e6594a8SSascha Wildner /*
1440e6594a8SSascha Wildner * Set registers for trampoline to user mode. Leave space for the
1450e6594a8SSascha Wildner * return address on stack. These are the kernel mode register values.
1460e6594a8SSascha Wildner */
1470e6594a8SSascha Wildner pcb2->pcb_unused01 = 0;
1480e6594a8SSascha Wildner pcb2->pcb_rbx = (unsigned long)fork_return; /* fork_trampoline argument */
1490e6594a8SSascha Wildner pcb2->pcb_rbp = 0;
1500e6594a8SSascha Wildner pcb2->pcb_rsp = (unsigned long)lp2->lwp_md.md_regs - sizeof(void *);
1510e6594a8SSascha Wildner pcb2->pcb_r12 = (unsigned long)lp2; /* fork_trampoline argument */
1520e6594a8SSascha Wildner pcb2->pcb_r13 = 0;
1530e6594a8SSascha Wildner pcb2->pcb_r14 = 0;
1540e6594a8SSascha Wildner pcb2->pcb_r15 = 0;
1550e6594a8SSascha Wildner pcb2->pcb_rip = (unsigned long)fork_trampoline;
1560e6594a8SSascha Wildner lp2->lwp_thread->td_sp = (char *)(pcb2->pcb_rsp - sizeof(void *));
1570e6594a8SSascha Wildner *(u_int64_t *)lp2->lwp_thread->td_sp = PSL_USER;
1580e6594a8SSascha Wildner lp2->lwp_thread->td_sp -= sizeof(void *);
1590e6594a8SSascha Wildner *(void **)lp2->lwp_thread->td_sp = (void *)cpu_heavy_restore;
1600e6594a8SSascha Wildner
1610e6594a8SSascha Wildner /*
1620e6594a8SSascha Wildner * pcb2->pcb_ldt: duplicated below, if necessary.
1630e6594a8SSascha Wildner * pcb2->pcb_savefpu: cloned above.
1640e6594a8SSascha Wildner * pcb2->pcb_flags: cloned above (always 0 here?).
1650e6594a8SSascha Wildner * pcb2->pcb_onfault: cloned above (always NULL here?).
1660e6594a8SSascha Wildner */
1670e6594a8SSascha Wildner
1680e6594a8SSascha Wildner /*
1690e6594a8SSascha Wildner * XXX don't copy the i/o pages. this should probably be fixed.
1700e6594a8SSascha Wildner */
171d8061892SSascha Wildner pcb2->pcb_ext = NULL;
1720e6594a8SSascha Wildner
1730e6594a8SSascha Wildner /* Copy the LDT, if necessary. */
174d8061892SSascha Wildner if (pcb2->pcb_ldt != NULL) {
1750e6594a8SSascha Wildner if (flags & RFMEM) {
176e845e9dcSMatthew Dillon atomic_add_int(&pcb2->pcb_ldt->ldt_refcnt, 1);
1770e6594a8SSascha Wildner } else {
1780e6594a8SSascha Wildner pcb2->pcb_ldt = user_ldt_alloc(pcb2,
1790e6594a8SSascha Wildner pcb2->pcb_ldt->ldt_len);
1800e6594a8SSascha Wildner }
1810e6594a8SSascha Wildner }
1820e6594a8SSascha Wildner bcopy(&lp1->lwp_thread->td_tls, &lp2->lwp_thread->td_tls,
1830e6594a8SSascha Wildner sizeof(lp2->lwp_thread->td_tls));
1840e6594a8SSascha Wildner /*
1850e6594a8SSascha Wildner * Now, cpu_switch() can schedule the new lwp.
1860e6594a8SSascha Wildner * pcb_rsp is loaded pointing to the cpu_switch() stack frame
1870e6594a8SSascha Wildner * containing the return address when exiting cpu_switch.
1880e6594a8SSascha Wildner * This will normally be to fork_trampoline(), which will have
1890e6594a8SSascha Wildner * %rbx loaded with the new lwp's pointer. fork_trampoline()
1900e6594a8SSascha Wildner * will set up a stack to call fork_return(lp, frame); to complete
1910e6594a8SSascha Wildner * the return to user-mode.
1920e6594a8SSascha Wildner */
1930e6594a8SSascha Wildner }
1940e6594a8SSascha Wildner
1950e6594a8SSascha Wildner /*
1960e6594a8SSascha Wildner * Prepare new lwp to return to the address specified in params.
1970e6594a8SSascha Wildner */
1980e6594a8SSascha Wildner int
cpu_prepare_lwp(struct lwp * lp,struct lwp_params * params)1990e6594a8SSascha Wildner cpu_prepare_lwp(struct lwp *lp, struct lwp_params *params)
2000e6594a8SSascha Wildner {
2010e6594a8SSascha Wildner struct trapframe *regs = lp->lwp_md.md_regs;
2020e6594a8SSascha Wildner void *bad_return = NULL;
2030e6594a8SSascha Wildner int error;
2040e6594a8SSascha Wildner
205b44473afSSascha Wildner regs->tf_rip = (long)params->lwp_func;
206b44473afSSascha Wildner regs->tf_rsp = (long)params->lwp_stack;
2070e6594a8SSascha Wildner /* Set up argument for function call */
208b44473afSSascha Wildner regs->tf_rdi = (long)params->lwp_arg; /* JG Can this be in userspace addresses? */
2090e6594a8SSascha Wildner /*
2100e6594a8SSascha Wildner * Set up fake return address. As the lwp function may never return,
2110e6594a8SSascha Wildner * we simply copy out a NULL pointer and force the lwp to receive
2120e6594a8SSascha Wildner * a SIGSEGV if it returns anyways.
2130e6594a8SSascha Wildner */
2140e6594a8SSascha Wildner regs->tf_rsp -= sizeof(void *);
2150e6594a8SSascha Wildner error = copyout(&bad_return, (void *)regs->tf_rsp, sizeof(bad_return));
2160e6594a8SSascha Wildner if (error)
2170e6594a8SSascha Wildner return (error);
2180e6594a8SSascha Wildner
2190e6594a8SSascha Wildner cpu_set_fork_handler(lp,
2200e6594a8SSascha Wildner (void (*)(void *, struct trapframe *))generic_lwp_return, lp);
2210e6594a8SSascha Wildner return (0);
2220e6594a8SSascha Wildner }
2230e6594a8SSascha Wildner
2240e6594a8SSascha Wildner /*
2250e6594a8SSascha Wildner * Intercept the return address from a freshly forked process that has NOT
2260e6594a8SSascha Wildner * been scheduled yet.
2270e6594a8SSascha Wildner *
2280e6594a8SSascha Wildner * This is needed to make kernel threads stay in kernel mode.
2290e6594a8SSascha Wildner */
2300e6594a8SSascha Wildner void
cpu_set_fork_handler(struct lwp * lp,void (* func)(void *,struct trapframe *),void * arg)2310e6594a8SSascha Wildner cpu_set_fork_handler(struct lwp *lp, void (*func)(void *, struct trapframe *),
2320e6594a8SSascha Wildner void *arg)
2330e6594a8SSascha Wildner {
2340e6594a8SSascha Wildner /*
2350e6594a8SSascha Wildner * Note that the trap frame follows the args, so the function
2360e6594a8SSascha Wildner * is really called like this: func(arg, frame);
2370e6594a8SSascha Wildner */
2380e6594a8SSascha Wildner lp->lwp_thread->td_pcb->pcb_rbx = (long)func; /* function */
2390e6594a8SSascha Wildner lp->lwp_thread->td_pcb->pcb_r12 = (long)arg; /* first arg */
2400e6594a8SSascha Wildner }
2410e6594a8SSascha Wildner
2420e6594a8SSascha Wildner void
cpu_set_thread_handler(thread_t td,void (* rfunc)(void),void * func,void * arg)2430e6594a8SSascha Wildner cpu_set_thread_handler(thread_t td, void (*rfunc)(void), void *func, void *arg)
2440e6594a8SSascha Wildner {
2450e6594a8SSascha Wildner td->td_pcb->pcb_rbx = (long)func;
2460e6594a8SSascha Wildner td->td_pcb->pcb_r12 = (long)arg;
2470e6594a8SSascha Wildner td->td_switch = cpu_lwkt_switch;
2480e6594a8SSascha Wildner td->td_sp -= sizeof(void *);
2490e6594a8SSascha Wildner *(void **)td->td_sp = rfunc; /* exit function on return */
2500e6594a8SSascha Wildner td->td_sp -= sizeof(void *);
2510e6594a8SSascha Wildner *(void **)td->td_sp = cpu_kthread_restore;
2520e6594a8SSascha Wildner }
2530e6594a8SSascha Wildner
2540e6594a8SSascha Wildner void
cpu_lwp_exit(void)2550e6594a8SSascha Wildner cpu_lwp_exit(void)
2560e6594a8SSascha Wildner {
2570e6594a8SSascha Wildner struct thread *td = curthread;
2580e6594a8SSascha Wildner struct pcb *pcb;
2599e6e869eSMatthew Dillon
2600e6594a8SSascha Wildner pcb = td->td_pcb;
2612883d2d8SMatthew Dillon
262*466d4f43Szrj /* Some x86 functionality was dropped */
2632883d2d8SMatthew Dillon KKASSERT(pcb->pcb_ext == NULL);
2642883d2d8SMatthew Dillon
2650e6594a8SSascha Wildner /*
2660e6594a8SSascha Wildner * disable all hardware breakpoints
2670e6594a8SSascha Wildner */
2682883d2d8SMatthew Dillon if (pcb->pcb_flags & PCB_DBREGS) {
2690e6594a8SSascha Wildner reset_dbregs();
2700e6594a8SSascha Wildner pcb->pcb_flags &= ~PCB_DBREGS;
2710e6594a8SSascha Wildner }
2720e6594a8SSascha Wildner td->td_gd->gd_cnt.v_swtch++;
2730e6594a8SSascha Wildner
2740e6594a8SSascha Wildner crit_enter_quick(td);
27574c9628eSMatthew Dillon if (td->td_flags & TDF_TSLEEPQ)
27674c9628eSMatthew Dillon tsleep_remove(td);
2770e6594a8SSascha Wildner lwkt_deschedule_self(td);
2780e6594a8SSascha Wildner lwkt_remove_tdallq(td);
2790e6594a8SSascha Wildner cpu_thread_exit();
2800e6594a8SSascha Wildner }
2810e6594a8SSascha Wildner
2820e6594a8SSascha Wildner /*
2830e6594a8SSascha Wildner * Terminate the current thread. The caller must have already acquired
2840e6594a8SSascha Wildner * the thread's rwlock and placed it on a reap list or otherwise notified
2850e6594a8SSascha Wildner * a reaper of its existance. We set a special assembly switch function which
2860e6594a8SSascha Wildner * releases td_rwlock after it has cleaned up the MMU state and switched
2870e6594a8SSascha Wildner * out the stack.
2880e6594a8SSascha Wildner *
2890e6594a8SSascha Wildner * Must be caller from a critical section and with the thread descheduled.
2900e6594a8SSascha Wildner */
2910e6594a8SSascha Wildner void
cpu_thread_exit(void)2920e6594a8SSascha Wildner cpu_thread_exit(void)
2930e6594a8SSascha Wildner {
2949e6e869eSMatthew Dillon npxexit();
2950e6594a8SSascha Wildner curthread->td_switch = cpu_exit_switch;
2960e6594a8SSascha Wildner curthread->td_flags |= TDF_EXITING;
2970e6594a8SSascha Wildner lwkt_switch();
2980e6594a8SSascha Wildner panic("cpu_thread_exit: lwkt_switch() unexpectedly returned");
2990e6594a8SSascha Wildner }
3000e6594a8SSascha Wildner
3010e6594a8SSascha Wildner /*
3020e6594a8SSascha Wildner * Used by /dev/kmem to determine if we can safely read or write
3030e6594a8SSascha Wildner * the requested KVA range. Some portions of kernel memory are
3040e6594a8SSascha Wildner * not governed by our virtual page table.
3050e6594a8SSascha Wildner */
3060e6594a8SSascha Wildner extern int64_t _end;
3070e6594a8SSascha Wildner extern void _start(void);
3080e6594a8SSascha Wildner
3090e6594a8SSascha Wildner int
kvm_access_check(vm_offset_t saddr,vm_offset_t eaddr,int prot)3100e6594a8SSascha Wildner kvm_access_check(vm_offset_t saddr, vm_offset_t eaddr, int prot)
3110e6594a8SSascha Wildner {
3120e6594a8SSascha Wildner vm_offset_t addr;
3130e6594a8SSascha Wildner
31476f1911eSMatthew Dillon if (saddr >= trunc_page((vm_offset_t)&_start) &&
31576f1911eSMatthew Dillon eaddr <= round_page((vm_offset_t)&_end)) {
3160e6594a8SSascha Wildner return 0;
31776f1911eSMatthew Dillon }
3180e6594a8SSascha Wildner if (saddr < KvaStart)
3190e6594a8SSascha Wildner return EFAULT;
3200e6594a8SSascha Wildner if (eaddr >= KvaEnd)
3210e6594a8SSascha Wildner return EFAULT;
3220e6594a8SSascha Wildner for (addr = saddr; addr < eaddr; addr += PAGE_SIZE) {
32376f1911eSMatthew Dillon if (pmap_kextract(addr) == 0)
3240e6594a8SSascha Wildner return EFAULT;
3250e6594a8SSascha Wildner }
3260e6594a8SSascha Wildner if (!kernacc((caddr_t)saddr, eaddr - saddr, prot))
3270e6594a8SSascha Wildner return EFAULT;
3280e6594a8SSascha Wildner return 0;
3290e6594a8SSascha Wildner }
330