1 /* $NetBSD: linux_machdep.c,v 1.36 2009/05/29 14:19:12 njoly Exp $ */ 2 3 /*- 4 * Copyright (c) 2005 Emmanuel Dreyfus, all rights reserved. 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 3. All advertising materials mentioning features or use of this software 15 * must display the following acknowledgement: 16 * This product includes software developed by Emmanuel Dreyfus 17 * 4. The name of the author may not be used to endorse or promote 18 * products derived from this software without specific prior written 19 * permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE THE AUTHOR AND CONTRIBUTORS ``AS IS'' 22 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, 23 * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 24 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS 25 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 26 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 27 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 28 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 29 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 30 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 31 * POSSIBILITY OF SUCH DAMAGE. 32 */ 33 34 #include <sys/cdefs.h> 35 36 __KERNEL_RCSID(0, "$NetBSD: linux_machdep.c,v 1.36 2009/05/29 14:19:12 njoly Exp $"); 37 38 #include <sys/param.h> 39 #include <sys/types.h> 40 #include <sys/systm.h> 41 #include <sys/signal.h> 42 #include <sys/exec.h> 43 #include <sys/proc.h> 44 #include <sys/ptrace.h> /* for process_read_fpregs() */ 45 #include <sys/user.h> 46 #include <sys/ucontext.h> 47 #include <sys/conf.h> 48 49 #include <machine/reg.h> 50 #include <machine/pcb.h> 51 #include <machine/fpu.h> 52 #include <machine/mcontext.h> 53 #include <machine/specialreg.h> 54 #include <machine/vmparam.h> 55 #include <machine/cpufunc.h> 56 57 /* 58 * To see whether wscons is configured (for virtual console ioctl calls). 59 */ 60 #if defined(_KERNEL_OPT) 61 #include "wsdisplay.h" 62 #endif 63 #if (NWSDISPLAY > 0) 64 #include <dev/wscons/wsconsio.h> 65 #include <dev/wscons/wsdisplay_usl_io.h> 66 #endif 67 68 69 #include <compat/linux/common/linux_signal.h> 70 #include <compat/linux/common/linux_errno.h> 71 #include <compat/linux/common/linux_exec.h> 72 #include <compat/linux/common/linux_ioctl.h> 73 #include <compat/linux/common/linux_prctl.h> 74 #include <compat/linux/common/linux_machdep.h> 75 #include <compat/linux/common/linux_ipc.h> 76 #include <compat/linux/common/linux_sem.h> 77 #include <compat/linux/linux_syscall.h> 78 #include <compat/linux/linux_syscallargs.h> 79 80 static void linux_buildcontext(struct lwp *, void *, void *); 81 82 void 83 linux_setregs(struct lwp *l, struct exec_package *epp, u_long stack) 84 { 85 struct pcb *pcb = &l->l_addr->u_pcb; 86 struct trapframe *tf; 87 88 /* If we were using the FPU, forget about it. */ 89 if (l->l_addr->u_pcb.pcb_fpcpu != NULL) 90 fpusave_lwp(l, 0); 91 92 l->l_md.md_flags &= ~MDP_USEDFPU; 93 pcb->pcb_flags = 0; 94 pcb->pcb_savefpu.fp_fxsave.fx_fcw = __NetBSD_NPXCW__; 95 pcb->pcb_savefpu.fp_fxsave.fx_mxcsr = __INITIAL_MXCSR__; 96 pcb->pcb_savefpu.fp_fxsave.fx_mxcsr_mask = __INITIAL_MXCSR_MASK__; 97 pcb->pcb_fs = 0; 98 pcb->pcb_gs = 0; 99 100 l->l_proc->p_flag &= ~PK_32; 101 102 tf = l->l_md.md_regs; 103 tf->tf_rax = 0; 104 tf->tf_rbx = 0; 105 tf->tf_rcx = epp->ep_entry; 106 tf->tf_rdx = 0; 107 tf->tf_rsi = 0; 108 tf->tf_rdi = 0; 109 tf->tf_rbp = 0; 110 tf->tf_rsp = stack; 111 tf->tf_r8 = 0; 112 tf->tf_r9 = 0; 113 tf->tf_r10 = 0; 114 tf->tf_r11 = 0; 115 tf->tf_r12 = 0; 116 tf->tf_r13 = 0; 117 tf->tf_r14 = 0; 118 tf->tf_r15 = 0; 119 tf->tf_rip = epp->ep_entry; 120 tf->tf_rflags = PSL_USERSET; 121 tf->tf_cs = GSEL(GUCODE_SEL, SEL_UPL); 122 tf->tf_ss = GSEL(GUDATA_SEL, SEL_UPL); 123 tf->tf_ds = 0; 124 tf->tf_es = 0; 125 tf->tf_fs = 0; 126 tf->tf_gs = 0; 127 128 return; 129 } 130 131 void 132 linux_sendsig(const ksiginfo_t *ksi, const sigset_t *mask) 133 { 134 struct lwp *l = curlwp; 135 struct proc *p = l->l_proc; 136 struct sigacts *ps = p->p_sigacts; 137 int onstack, error; 138 int sig = ksi->ksi_signo; 139 struct linux_rt_sigframe *sfp, sigframe; 140 struct linux__fpstate *fpsp, fpstate; 141 struct fpreg fpregs; 142 struct trapframe *tf = l->l_md.md_regs; 143 sig_t catcher = SIGACTION(p, sig).sa_handler; 144 linux_sigset_t lmask; 145 char *sp; 146 147 /* Do we need to jump onto the signal stack? */ 148 onstack = 149 (l->l_sigstk.ss_flags & (SS_DISABLE | SS_ONSTACK)) == 0 && 150 (SIGACTION(p, sig).sa_flags & SA_ONSTACK) != 0; 151 152 /* Allocate space for the signal handler context. */ 153 if (onstack) 154 sp = ((char *)l->l_sigstk.ss_sp + 155 l->l_sigstk.ss_size); 156 else 157 sp = (char *)tf->tf_rsp - 128; 158 159 /* 160 * Save FPU state, if any 161 */ 162 if (l->l_md.md_flags & MDP_USEDFPU) { 163 sp = (char *) 164 (((long)sp - sizeof(struct linux__fpstate)) & ~0xfUL); 165 fpsp = (struct linux__fpstate *)sp; 166 } else 167 fpsp = NULL; 168 169 /* 170 * Populate the rt_sigframe 171 */ 172 sp = (char *) 173 ((((long)sp - sizeof(struct linux_rt_sigframe)) & ~0xfUL) - 8); 174 sfp = (struct linux_rt_sigframe *)sp; 175 176 memset(&sigframe, 0, sizeof(sigframe)); 177 if (ps->sa_sigdesc[sig].sd_vers != 0) 178 sigframe.pretcode = 179 (char *)(u_long)ps->sa_sigdesc[sig].sd_tramp; 180 else 181 sigframe.pretcode = NULL; 182 183 /* 184 * The user context 185 */ 186 sigframe.uc.luc_flags = 0; 187 sigframe.uc.luc_link = NULL; 188 189 /* This is used regardless of SA_ONSTACK in Linux */ 190 sigframe.uc.luc_stack.ss_sp = l->l_sigstk.ss_sp; 191 sigframe.uc.luc_stack.ss_size = l->l_sigstk.ss_size; 192 sigframe.uc.luc_stack.ss_flags = 0; 193 if (l->l_sigstk.ss_flags & SS_ONSTACK) 194 sigframe.uc.luc_stack.ss_flags |= LINUX_SS_ONSTACK; 195 if (l->l_sigstk.ss_flags & SS_DISABLE) 196 sigframe.uc.luc_stack.ss_flags |= LINUX_SS_DISABLE; 197 198 sigframe.uc.luc_mcontext.r8 = tf->tf_r8; 199 sigframe.uc.luc_mcontext.r9 = tf->tf_r9; 200 sigframe.uc.luc_mcontext.r10 = tf->tf_r10; 201 sigframe.uc.luc_mcontext.r11 = tf->tf_r11; 202 sigframe.uc.luc_mcontext.r12 = tf->tf_r12; 203 sigframe.uc.luc_mcontext.r13 = tf->tf_r13; 204 sigframe.uc.luc_mcontext.r14 = tf->tf_r14; 205 sigframe.uc.luc_mcontext.r15 = tf->tf_r15; 206 sigframe.uc.luc_mcontext.rdi = tf->tf_rdi; 207 sigframe.uc.luc_mcontext.rsi = tf->tf_rsi; 208 sigframe.uc.luc_mcontext.rbp = tf->tf_rbp; 209 sigframe.uc.luc_mcontext.rbx = tf->tf_rbx; 210 sigframe.uc.luc_mcontext.rdx = tf->tf_rdx; 211 sigframe.uc.luc_mcontext.rax = tf->tf_rax; 212 sigframe.uc.luc_mcontext.rcx = tf->tf_rcx; 213 sigframe.uc.luc_mcontext.rsp = tf->tf_rsp; 214 sigframe.uc.luc_mcontext.rip = tf->tf_rip; 215 sigframe.uc.luc_mcontext.eflags = tf->tf_rflags; 216 sigframe.uc.luc_mcontext.cs = tf->tf_cs; 217 sigframe.uc.luc_mcontext.gs = tf->tf_gs; 218 sigframe.uc.luc_mcontext.fs = tf->tf_fs; 219 sigframe.uc.luc_mcontext.err = tf->tf_err; 220 sigframe.uc.luc_mcontext.trapno = tf->tf_trapno; 221 native_to_linux_sigset(&lmask, mask); 222 sigframe.uc.luc_mcontext.oldmask = lmask.sig[0]; 223 sigframe.uc.luc_mcontext.cr2 = (long)l->l_addr->u_pcb.pcb_onfault; 224 sigframe.uc.luc_mcontext.fpstate = fpsp; 225 native_to_linux_sigset(&sigframe.uc.luc_sigmask, mask); 226 227 /* 228 * the siginfo structure 229 */ 230 sigframe.info.lsi_signo = native_to_linux_signo[sig]; 231 sigframe.info.lsi_errno = native_to_linux_errno[ksi->ksi_errno]; 232 sigframe.info.lsi_code = native_to_linux_si_code(ksi->ksi_code); 233 234 /* XXX This is a rought conversion, taken from i386 code */ 235 switch (sigframe.info.lsi_signo) { 236 case LINUX_SIGILL: 237 case LINUX_SIGFPE: 238 case LINUX_SIGSEGV: 239 case LINUX_SIGBUS: 240 case LINUX_SIGTRAP: 241 sigframe.info._sifields._sigfault._addr = ksi->ksi_addr; 242 break; 243 case LINUX_SIGCHLD: 244 sigframe.info._sifields._sigchld._pid = ksi->ksi_pid; 245 sigframe.info._sifields._sigchld._uid = ksi->ksi_uid; 246 sigframe.info._sifields._sigchld._utime = ksi->ksi_utime; 247 sigframe.info._sifields._sigchld._stime = ksi->ksi_stime; 248 sigframe.info._sifields._sigchld._status = 249 native_to_linux_si_status(ksi->ksi_code, ksi->ksi_status); 250 break; 251 case LINUX_SIGIO: 252 sigframe.info._sifields._sigpoll._band = ksi->ksi_band; 253 sigframe.info._sifields._sigpoll._fd = ksi->ksi_fd; 254 break; 255 default: 256 sigframe.info._sifields._sigchld._pid = ksi->ksi_pid; 257 sigframe.info._sifields._sigchld._uid = ksi->ksi_uid; 258 if ((sigframe.info.lsi_signo == LINUX_SIGALRM) || 259 (sigframe.info.lsi_signo >= LINUX_SIGRTMIN)) 260 sigframe.info._sifields._timer._sigval.sival_ptr = 261 ksi->ksi_value.sival_ptr; 262 break; 263 } 264 265 sendsig_reset(l, sig); 266 mutex_exit(p->p_lock); 267 error = 0; 268 269 /* 270 * Save FPU state, if any 271 */ 272 if (fpsp != NULL) { 273 (void)process_read_fpregs(l, &fpregs); 274 memset(&fpstate, 0, sizeof(fpstate)); 275 fpstate.cwd = fpregs.fp_fcw; 276 fpstate.swd = fpregs.fp_fsw; 277 fpstate.twd = fpregs.fp_ftw; 278 fpstate.fop = fpregs.fp_fop; 279 fpstate.rip = fpregs.fp_rip; 280 fpstate.rdp = fpregs.fp_rdp; 281 fpstate.mxcsr = fpregs.fp_mxcsr; 282 fpstate.mxcsr_mask = fpregs.fp_mxcsr_mask; 283 memcpy(&fpstate.st_space, &fpregs.fp_st, 284 sizeof(fpstate.st_space)); 285 memcpy(&fpstate.xmm_space, &fpregs.fp_xmm, 286 sizeof(fpstate.xmm_space)); 287 error = copyout(&fpstate, fpsp, sizeof(fpstate)); 288 } 289 290 if (error == 0) 291 error = copyout(&sigframe, sp, sizeof(sigframe)); 292 293 mutex_enter(p->p_lock); 294 295 if (error != 0) { 296 sigexit(l, SIGILL); 297 return; 298 } 299 300 linux_buildcontext(l, catcher, sp); 301 tf->tf_rdi = sigframe.info.lsi_signo; 302 tf->tf_rax = 0; 303 tf->tf_rsi = (long)&sfp->info; 304 tf->tf_rdx = (long)&sfp->uc; 305 306 /* 307 * Remember we use signal stack 308 */ 309 if (onstack) 310 l->l_sigstk.ss_flags |= SS_ONSTACK; 311 return; 312 } 313 314 int 315 linux_sys_modify_ldt(struct lwp *l, const struct linux_sys_modify_ldt_args *v, register_t *retval) 316 { 317 printf("linux_sys_modify_ldt\n"); 318 return 0; 319 } 320 321 int 322 linux_sys_iopl(struct lwp *l, const struct linux_sys_iopl_args *v, register_t *retval) 323 { 324 return 0; 325 } 326 327 int 328 linux_sys_ioperm(struct lwp *l, const struct linux_sys_ioperm_args *v, register_t *retval) 329 { 330 return 0; 331 } 332 333 dev_t 334 linux_fakedev(dev_t dev, int raw) 335 { 336 337 extern const struct cdevsw ptc_cdevsw, pts_cdevsw; 338 const struct cdevsw *cd = cdevsw_lookup(dev); 339 340 if (raw) { 341 #if (NWSDISPLAY > 0) 342 extern const struct cdevsw wsdisplay_cdevsw; 343 if (cd == &wsdisplay_cdevsw) 344 return makedev(LINUX_CONS_MAJOR, (minor(dev) + 1)); 345 #endif 346 } 347 348 if (cd == &ptc_cdevsw) 349 return makedev(LINUX_PTC_MAJOR, minor(dev)); 350 if (cd == &pts_cdevsw) 351 return makedev(LINUX_PTS_MAJOR, minor(dev)); 352 353 return ((minor(dev) & 0xff) | ((major(dev) & 0xfff) << 8) 354 | (((unsigned long long int) (minor(dev) & ~0xff)) << 12) 355 | (((unsigned long long int) (major(dev) & ~0xfff)) << 32)); 356 } 357 358 int 359 linux_machdepioctl(struct lwp *l, const struct linux_sys_ioctl_args *v, register_t *retval) 360 { 361 return 0; 362 } 363 364 int 365 linux_sys_rt_sigreturn(struct lwp *l, const void *v, register_t *retval) 366 { 367 struct linux_ucontext *luctx; 368 struct trapframe *tf = l->l_md.md_regs; 369 struct linux_sigcontext *lsigctx; 370 struct linux__fpstate fpstate; 371 struct linux_rt_sigframe frame, *fp; 372 ucontext_t uctx; 373 mcontext_t *mctx; 374 struct fxsave64 *fxarea; 375 int error; 376 377 fp = (struct linux_rt_sigframe *)(tf->tf_rsp - 8); 378 if ((error = copyin(fp, &frame, sizeof(frame))) != 0) { 379 mutex_enter(l->l_proc->p_lock); 380 sigexit(l, SIGILL); 381 return error; 382 } 383 luctx = &frame.uc; 384 lsigctx = &luctx->luc_mcontext; 385 386 memset(&uctx, 0, sizeof(uctx)); 387 mctx = (mcontext_t *)&uctx.uc_mcontext; 388 fxarea = (struct fxsave64 *)&mctx->__fpregs; 389 390 /* 391 * Set the flags. Linux always have CPU, stack and signal state, 392 * FPU is optional. uc_flags is not used to tell what we have. 393 */ 394 uctx.uc_flags = (_UC_SIGMASK|_UC_CPU|_UC_STACK|_UC_CLRSTACK); 395 if (lsigctx->fpstate != NULL) 396 uctx.uc_flags |= _UC_FPU; 397 uctx.uc_link = NULL; 398 399 /* 400 * Signal set 401 */ 402 linux_to_native_sigset(&uctx.uc_sigmask, &luctx->luc_sigmask); 403 404 /* 405 * CPU state 406 */ 407 mctx->__gregs[_REG_R8] = lsigctx->r8; 408 mctx->__gregs[_REG_R9] = lsigctx->r9; 409 mctx->__gregs[_REG_R10] = lsigctx->r10; 410 mctx->__gregs[_REG_R11] = lsigctx->r11; 411 mctx->__gregs[_REG_R12] = lsigctx->r12; 412 mctx->__gregs[_REG_R13] = lsigctx->r13; 413 mctx->__gregs[_REG_R14] = lsigctx->r14; 414 mctx->__gregs[_REG_R15] = lsigctx->r15; 415 mctx->__gregs[_REG_RDI] = lsigctx->rdi; 416 mctx->__gregs[_REG_RSI] = lsigctx->rsi; 417 mctx->__gregs[_REG_RBP] = lsigctx->rbp; 418 mctx->__gregs[_REG_RBX] = lsigctx->rbx; 419 mctx->__gregs[_REG_RAX] = lsigctx->rax; 420 mctx->__gregs[_REG_RDX] = lsigctx->rdx; 421 mctx->__gregs[_REG_RCX] = lsigctx->rcx; 422 mctx->__gregs[_REG_RIP] = lsigctx->rip; 423 mctx->__gregs[_REG_RFLAGS] = lsigctx->eflags; 424 mctx->__gregs[_REG_CS] = lsigctx->cs; 425 mctx->__gregs[_REG_GS] = lsigctx->gs; 426 mctx->__gregs[_REG_FS] = lsigctx->fs; 427 mctx->__gregs[_REG_ERR] = lsigctx->err; 428 mctx->__gregs[_REG_TRAPNO] = lsigctx->trapno; 429 mctx->__gregs[_REG_ES] = tf->tf_es; 430 mctx->__gregs[_REG_DS] = tf->tf_ds; 431 mctx->__gregs[_REG_RSP] = lsigctx->rsp; /* XXX */ 432 mctx->__gregs[_REG_SS] = tf->tf_ss; 433 434 /* 435 * FPU state 436 */ 437 if (lsigctx->fpstate != NULL) { 438 error = copyin(lsigctx->fpstate, &fpstate, sizeof(fpstate)); 439 if (error != 0) { 440 mutex_enter(l->l_proc->p_lock); 441 sigexit(l, SIGILL); 442 return error; 443 } 444 445 fxarea->fx_fcw = fpstate.cwd; 446 fxarea->fx_fsw = fpstate.swd; 447 fxarea->fx_ftw = fpstate.twd; 448 fxarea->fx_fop = fpstate.fop; 449 fxarea->fx_rip = fpstate.rip; 450 fxarea->fx_rdp = fpstate.rdp; 451 fxarea->fx_mxcsr = fpstate.mxcsr; 452 fxarea->fx_mxcsr_mask = fpstate.mxcsr_mask; 453 memcpy(&fxarea->fx_st, &fpstate.st_space, 454 sizeof(fxarea->fx_st)); 455 memcpy(&fxarea->fx_xmm, &fpstate.xmm_space, 456 sizeof(fxarea->fx_xmm)); 457 } 458 459 /* 460 * And the stack 461 */ 462 uctx.uc_stack.ss_flags = 0; 463 if (luctx->luc_stack.ss_flags & LINUX_SS_ONSTACK) 464 uctx.uc_stack.ss_flags |= SS_ONSTACK; 465 466 if (luctx->luc_stack.ss_flags & LINUX_SS_DISABLE) 467 uctx.uc_stack.ss_flags |= SS_DISABLE; 468 469 uctx.uc_stack.ss_sp = luctx->luc_stack.ss_sp; 470 uctx.uc_stack.ss_size = luctx->luc_stack.ss_size; 471 472 /* 473 * And let setucontext deal with that. 474 */ 475 mutex_enter(l->l_proc->p_lock); 476 error = setucontext(l, &uctx); 477 mutex_exit(l->l_proc->p_lock); 478 if (error) 479 return error; 480 481 return EJUSTRETURN; 482 } 483 484 int 485 linux_sys_arch_prctl(struct lwp *l, const struct linux_sys_arch_prctl_args *uap, register_t *retval) 486 { 487 /* { 488 syscallarg(int) code; 489 syscallarg(unsigned long) addr; 490 } */ 491 struct pcb *pcb = &l->l_addr->u_pcb; 492 struct trapframe *tf = l->l_md.md_regs; 493 int error; 494 uint64_t taddr; 495 496 switch(SCARG(uap, code)) { 497 case LINUX_ARCH_SET_GS: 498 taddr = SCARG(uap, addr); 499 if (taddr >= VM_MAXUSER_ADDRESS) 500 return EINVAL; 501 pcb->pcb_gs = taddr; 502 pcb->pcb_flags |= PCB_GS64; 503 if (l == curlwp) 504 wrmsr(MSR_KERNELGSBASE, taddr); 505 break; 506 507 case LINUX_ARCH_GET_GS: 508 if (pcb->pcb_flags & PCB_GS64) 509 taddr = pcb->pcb_gs; 510 else { 511 error = memseg_baseaddr(l, tf->tf_fs, NULL, 0, &taddr); 512 if (error != 0) 513 return error; 514 } 515 error = copyout(&taddr, (char *)SCARG(uap, addr), 8); 516 if (error != 0) 517 return error; 518 break; 519 520 case LINUX_ARCH_SET_FS: 521 taddr = SCARG(uap, addr); 522 if (taddr >= VM_MAXUSER_ADDRESS) 523 return EINVAL; 524 pcb->pcb_fs = taddr; 525 pcb->pcb_flags |= PCB_FS64; 526 if (l == curlwp) 527 wrmsr(MSR_FSBASE, taddr); 528 break; 529 530 case LINUX_ARCH_GET_FS: 531 if (pcb->pcb_flags & PCB_FS64) 532 taddr = pcb->pcb_fs; 533 else { 534 error = memseg_baseaddr(l, tf->tf_fs, NULL, 0, &taddr); 535 if (error != 0) 536 return error; 537 } 538 error = copyout(&taddr, (char *)SCARG(uap, addr), 8); 539 if (error != 0) 540 return error; 541 break; 542 543 default: 544 #ifdef DEBUG_LINUX 545 printf("linux_sys_arch_prctl: unexpected code %d\n", 546 SCARG(uap, code)); 547 #endif 548 return EINVAL; 549 } 550 551 return 0; 552 } 553 554 const int linux_vsyscall_to_syscall[] = { 555 LINUX_SYS_gettimeofday, 556 LINUX_SYS_time, 557 LINUX_SYS_nosys, /* nosys */ 558 LINUX_SYS_nosys, /* nosys */ 559 }; 560 561 int 562 linux_usertrap(struct lwp *l, vaddr_t trapaddr, void *arg) 563 { 564 struct trapframe *tf = arg; 565 uint64_t retaddr; 566 int vsyscallnr; 567 568 /* 569 * Check for a vsyscall. %rip must be the fault address, 570 * and the address must be in the Linux vsyscall area. 571 * Also, vsyscalls are only done at 1024-byte boundaries. 572 */ 573 574 if (__predict_true(trapaddr < LINUX_VSYSCALL_START)) 575 return 0; 576 577 if (trapaddr != tf->tf_rip) 578 return 0; 579 580 if ((tf->tf_rip & (LINUX_VSYSCALL_SIZE - 1)) != 0) 581 return 0; 582 583 vsyscallnr = (tf->tf_rip - LINUX_VSYSCALL_START) / LINUX_VSYSCALL_SIZE; 584 585 if (vsyscallnr > LINUX_VSYSCALL_MAXNR) 586 return 0; 587 588 /* 589 * Get the return address from the top of the stack, 590 * and fix up the return address. 591 * This assumes the faulting instruction was callq *reg, 592 * which is the only way that vsyscalls are ever entered. 593 */ 594 if (copyin((void *)tf->tf_rsp, &retaddr, sizeof retaddr) != 0) 595 return 0; 596 tf->tf_rip = retaddr; 597 tf->tf_rax = linux_vsyscall_to_syscall[vsyscallnr]; 598 tf->tf_rsp += 8; /* "pop" the return address */ 599 600 #if 0 601 printf("usertrap: rip %p rsp %p retaddr %p vsys %d sys %d\n", 602 (void *)tf->tf_rip, (void *)tf->tf_rsp, (void *)retaddr, 603 vsyscallnr, (int)tf->tf_rax); 604 #endif 605 606 (*l->l_proc->p_md.md_syscall)(tf); 607 608 return 1; 609 } 610 611 static void 612 linux_buildcontext(struct lwp *l, void *catcher, void *f) 613 { 614 struct trapframe *tf = l->l_md.md_regs; 615 616 tf->tf_ds = GSEL(GUDATA_SEL, SEL_UPL); 617 tf->tf_rip = (u_int64_t)catcher; 618 tf->tf_cs = GSEL(GUCODE_SEL, SEL_UPL); 619 tf->tf_rflags &= ~PSL_CLEARSIG; 620 tf->tf_rsp = (u_int64_t)f; 621 tf->tf_ss = GSEL(GUDATA_SEL, SEL_UPL); 622 } 623 624 void * 625 linux_get_newtls(struct lwp *l) 626 { 627 struct trapframe *tf = l->l_md.md_regs; 628 629 return (void *)tf->tf_r8; 630 } 631 632 int 633 linux_set_newtls(struct lwp *l, void *tls) 634 { 635 struct linux_sys_arch_prctl_args cup; 636 register_t retval; 637 638 SCARG(&cup, code) = LINUX_ARCH_SET_FS; 639 SCARG(&cup, addr) = (unsigned long)tls; 640 641 return linux_sys_arch_prctl(l, &cup, &retval); 642 } 643