1 /* $NetBSD: linux_machdep.c,v 1.35 2009/03/18 16:00:16 cegger Exp $ */ 2 3 /*- 4 * Copyright (c) 2005 Emmanuel Dreyfus, all rights reserved. 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 3. All advertising materials mentioning features or use of this software 15 * must display the following acknowledgement: 16 * This product includes software developed by Emmanuel Dreyfus 17 * 4. The name of the author may not be used to endorse or promote 18 * products derived from this software without specific prior written 19 * permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE THE AUTHOR AND CONTRIBUTORS ``AS IS'' 22 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, 23 * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 24 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS 25 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 26 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 27 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 28 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 29 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 30 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 31 * POSSIBILITY OF SUCH DAMAGE. 32 */ 33 34 #include <sys/cdefs.h> 35 36 __KERNEL_RCSID(0, "$NetBSD: linux_machdep.c,v 1.35 2009/03/18 16:00:16 cegger Exp $"); 37 38 #include <sys/param.h> 39 #include <sys/types.h> 40 #include <sys/systm.h> 41 #include <sys/signal.h> 42 #include <sys/exec.h> 43 #include <sys/proc.h> 44 #include <sys/ptrace.h> /* for process_read_fpregs() */ 45 #include <sys/user.h> 46 #include <sys/wait.h> 47 #include <sys/ucontext.h> 48 #include <sys/conf.h> 49 50 #include <machine/reg.h> 51 #include <machine/pcb.h> 52 #include <machine/fpu.h> 53 #include <machine/mcontext.h> 54 #include <machine/specialreg.h> 55 #include <machine/vmparam.h> 56 #include <machine/cpufunc.h> 57 58 /* 59 * To see whether wscons is configured (for virtual console ioctl calls). 60 */ 61 #if defined(_KERNEL_OPT) 62 #include "wsdisplay.h" 63 #endif 64 #if (NWSDISPLAY > 0) 65 #include <dev/wscons/wsconsio.h> 66 #include <dev/wscons/wsdisplay_usl_io.h> 67 #endif 68 69 70 #include <compat/linux/common/linux_signal.h> 71 #include <compat/linux/common/linux_errno.h> 72 #include <compat/linux/common/linux_exec.h> 73 #include <compat/linux/common/linux_ioctl.h> 74 #include <compat/linux/common/linux_prctl.h> 75 #include <compat/linux/common/linux_machdep.h> 76 #include <compat/linux/common/linux_ipc.h> 77 #include <compat/linux/common/linux_sem.h> 78 #include <compat/linux/linux_syscall.h> 79 #include <compat/linux/linux_syscallargs.h> 80 81 static void linux_buildcontext(struct lwp *, void *, void *); 82 83 void 84 linux_setregs(struct lwp *l, struct exec_package *epp, u_long stack) 85 { 86 struct pcb *pcb = &l->l_addr->u_pcb; 87 struct trapframe *tf; 88 89 /* If we were using the FPU, forget about it. */ 90 if (l->l_addr->u_pcb.pcb_fpcpu != NULL) 91 fpusave_lwp(l, 0); 92 93 l->l_md.md_flags &= ~MDP_USEDFPU; 94 pcb->pcb_flags = 0; 95 pcb->pcb_savefpu.fp_fxsave.fx_fcw = __NetBSD_NPXCW__; 96 pcb->pcb_savefpu.fp_fxsave.fx_mxcsr = __INITIAL_MXCSR__; 97 pcb->pcb_savefpu.fp_fxsave.fx_mxcsr_mask = __INITIAL_MXCSR_MASK__; 98 pcb->pcb_fs = 0; 99 pcb->pcb_gs = 0; 100 101 l->l_proc->p_flag &= ~PK_32; 102 103 tf = l->l_md.md_regs; 104 tf->tf_rax = 0; 105 tf->tf_rbx = 0; 106 tf->tf_rcx = epp->ep_entry; 107 tf->tf_rdx = 0; 108 tf->tf_rsi = 0; 109 tf->tf_rdi = 0; 110 tf->tf_rbp = 0; 111 tf->tf_rsp = stack; 112 tf->tf_r8 = 0; 113 tf->tf_r9 = 0; 114 tf->tf_r10 = 0; 115 tf->tf_r11 = 0; 116 tf->tf_r12 = 0; 117 tf->tf_r13 = 0; 118 tf->tf_r14 = 0; 119 tf->tf_r15 = 0; 120 tf->tf_rip = epp->ep_entry; 121 tf->tf_rflags = PSL_USERSET; 122 tf->tf_cs = GSEL(GUCODE_SEL, SEL_UPL); 123 tf->tf_ss = GSEL(GUDATA_SEL, SEL_UPL); 124 tf->tf_ds = 0; 125 tf->tf_es = 0; 126 tf->tf_fs = 0; 127 tf->tf_gs = 0; 128 129 return; 130 } 131 132 void 133 linux_sendsig(const ksiginfo_t *ksi, const sigset_t *mask) 134 { 135 struct lwp *l = curlwp; 136 struct proc *p = l->l_proc; 137 struct sigacts *ps = p->p_sigacts; 138 int onstack, error; 139 int sig = ksi->ksi_signo; 140 struct linux_rt_sigframe *sfp, sigframe; 141 struct linux__fpstate *fpsp, fpstate; 142 struct fpreg fpregs; 143 struct trapframe *tf = l->l_md.md_regs; 144 sig_t catcher = SIGACTION(p, sig).sa_handler; 145 linux_sigset_t lmask; 146 char *sp; 147 148 /* Do we need to jump onto the signal stack? */ 149 onstack = 150 (l->l_sigstk.ss_flags & (SS_DISABLE | SS_ONSTACK)) == 0 && 151 (SIGACTION(p, sig).sa_flags & SA_ONSTACK) != 0; 152 153 /* Allocate space for the signal handler context. */ 154 if (onstack) 155 sp = ((char *)l->l_sigstk.ss_sp + 156 l->l_sigstk.ss_size); 157 else 158 sp = (char *)tf->tf_rsp - 128; 159 160 /* 161 * Save FPU state, if any 162 */ 163 if (l->l_md.md_flags & MDP_USEDFPU) { 164 sp = (char *) 165 (((long)sp - sizeof(struct linux__fpstate)) & ~0xfUL); 166 fpsp = (struct linux__fpstate *)sp; 167 } else 168 fpsp = NULL; 169 170 /* 171 * Populate the rt_sigframe 172 */ 173 sp = (char *) 174 ((((long)sp - sizeof(struct linux_rt_sigframe)) & ~0xfUL) - 8); 175 sfp = (struct linux_rt_sigframe *)sp; 176 177 memset(&sigframe, 0, sizeof(sigframe)); 178 if (ps->sa_sigdesc[sig].sd_vers != 0) 179 sigframe.pretcode = 180 (char *)(u_long)ps->sa_sigdesc[sig].sd_tramp; 181 else 182 sigframe.pretcode = NULL; 183 184 /* 185 * The user context 186 */ 187 sigframe.uc.luc_flags = 0; 188 sigframe.uc.luc_link = NULL; 189 190 /* This is used regardless of SA_ONSTACK in Linux */ 191 sigframe.uc.luc_stack.ss_sp = l->l_sigstk.ss_sp; 192 sigframe.uc.luc_stack.ss_size = l->l_sigstk.ss_size; 193 sigframe.uc.luc_stack.ss_flags = 0; 194 if (l->l_sigstk.ss_flags & SS_ONSTACK) 195 sigframe.uc.luc_stack.ss_flags |= LINUX_SS_ONSTACK; 196 if (l->l_sigstk.ss_flags & SS_DISABLE) 197 sigframe.uc.luc_stack.ss_flags |= LINUX_SS_DISABLE; 198 199 sigframe.uc.luc_mcontext.r8 = tf->tf_r8; 200 sigframe.uc.luc_mcontext.r9 = tf->tf_r9; 201 sigframe.uc.luc_mcontext.r10 = tf->tf_r10; 202 sigframe.uc.luc_mcontext.r11 = tf->tf_r11; 203 sigframe.uc.luc_mcontext.r12 = tf->tf_r12; 204 sigframe.uc.luc_mcontext.r13 = tf->tf_r13; 205 sigframe.uc.luc_mcontext.r14 = tf->tf_r14; 206 sigframe.uc.luc_mcontext.r15 = tf->tf_r15; 207 sigframe.uc.luc_mcontext.rdi = tf->tf_rdi; 208 sigframe.uc.luc_mcontext.rsi = tf->tf_rsi; 209 sigframe.uc.luc_mcontext.rbp = tf->tf_rbp; 210 sigframe.uc.luc_mcontext.rbx = tf->tf_rbx; 211 sigframe.uc.luc_mcontext.rdx = tf->tf_rdx; 212 sigframe.uc.luc_mcontext.rax = tf->tf_rax; 213 sigframe.uc.luc_mcontext.rcx = tf->tf_rcx; 214 sigframe.uc.luc_mcontext.rsp = tf->tf_rsp; 215 sigframe.uc.luc_mcontext.rip = tf->tf_rip; 216 sigframe.uc.luc_mcontext.eflags = tf->tf_rflags; 217 sigframe.uc.luc_mcontext.cs = tf->tf_cs; 218 sigframe.uc.luc_mcontext.gs = tf->tf_gs; 219 sigframe.uc.luc_mcontext.fs = tf->tf_fs; 220 sigframe.uc.luc_mcontext.err = tf->tf_err; 221 sigframe.uc.luc_mcontext.trapno = tf->tf_trapno; 222 native_to_linux_sigset(&lmask, mask); 223 sigframe.uc.luc_mcontext.oldmask = lmask.sig[0]; 224 sigframe.uc.luc_mcontext.cr2 = (long)l->l_addr->u_pcb.pcb_onfault; 225 sigframe.uc.luc_mcontext.fpstate = fpsp; 226 native_to_linux_sigset(&sigframe.uc.luc_sigmask, mask); 227 228 /* 229 * the siginfo structure 230 */ 231 sigframe.info.lsi_signo = native_to_linux_signo[sig]; 232 sigframe.info.lsi_errno = native_to_linux_errno[ksi->ksi_errno]; 233 sigframe.info.lsi_code = native_to_linux_si_code(ksi->ksi_code); 234 235 /* XXX This is a rought conversion, taken from i386 code */ 236 switch (sigframe.info.lsi_signo) { 237 case LINUX_SIGILL: 238 case LINUX_SIGFPE: 239 case LINUX_SIGSEGV: 240 case LINUX_SIGBUS: 241 case LINUX_SIGTRAP: 242 sigframe.info._sifields._sigfault._addr = ksi->ksi_addr; 243 break; 244 case LINUX_SIGCHLD: 245 sigframe.info._sifields._sigchld._pid = ksi->ksi_pid; 246 sigframe.info._sifields._sigchld._uid = ksi->ksi_uid; 247 sigframe.info._sifields._sigchld._utime = ksi->ksi_utime; 248 sigframe.info._sifields._sigchld._stime = ksi->ksi_stime; 249 250 if (WCOREDUMP(ksi->ksi_status)) { 251 sigframe.info.lsi_code = LINUX_CLD_DUMPED; 252 sigframe.info._sifields._sigchld._status = 253 _WSTATUS(ksi->ksi_status); 254 } else if (_WSTATUS(ksi->ksi_status)) { 255 sigframe.info.lsi_code = LINUX_CLD_KILLED; 256 sigframe.info._sifields._sigchld._status = 257 _WSTATUS(ksi->ksi_status); 258 } else { 259 sigframe.info.lsi_code = LINUX_CLD_EXITED; 260 sigframe.info._sifields._sigchld._status = 261 ((ksi->ksi_status & 0xff00U) >> 8); 262 } 263 break; 264 case LINUX_SIGIO: 265 sigframe.info._sifields._sigpoll._band = ksi->ksi_band; 266 sigframe.info._sifields._sigpoll._fd = ksi->ksi_fd; 267 break; 268 default: 269 sigframe.info._sifields._sigchld._pid = ksi->ksi_pid; 270 sigframe.info._sifields._sigchld._uid = ksi->ksi_uid; 271 if ((sigframe.info.lsi_signo == LINUX_SIGALRM) || 272 (sigframe.info.lsi_signo >= LINUX_SIGRTMIN)) 273 sigframe.info._sifields._timer._sigval.sival_ptr = 274 ksi->ksi_value.sival_ptr; 275 break; 276 } 277 278 sendsig_reset(l, sig); 279 mutex_exit(p->p_lock); 280 error = 0; 281 282 /* 283 * Save FPU state, if any 284 */ 285 if (fpsp != NULL) { 286 (void)process_read_fpregs(l, &fpregs); 287 memset(&fpstate, 0, sizeof(fpstate)); 288 fpstate.cwd = fpregs.fp_fcw; 289 fpstate.swd = fpregs.fp_fsw; 290 fpstate.twd = fpregs.fp_ftw; 291 fpstate.fop = fpregs.fp_fop; 292 fpstate.rip = fpregs.fp_rip; 293 fpstate.rdp = fpregs.fp_rdp; 294 fpstate.mxcsr = fpregs.fp_mxcsr; 295 fpstate.mxcsr_mask = fpregs.fp_mxcsr_mask; 296 memcpy(&fpstate.st_space, &fpregs.fp_st, 297 sizeof(fpstate.st_space)); 298 memcpy(&fpstate.xmm_space, &fpregs.fp_xmm, 299 sizeof(fpstate.xmm_space)); 300 error = copyout(&fpstate, fpsp, sizeof(fpstate)); 301 } 302 303 if (error == 0) 304 error = copyout(&sigframe, sp, sizeof(sigframe)); 305 306 mutex_enter(p->p_lock); 307 308 if (error != 0) { 309 sigexit(l, SIGILL); 310 return; 311 } 312 313 linux_buildcontext(l, catcher, sp); 314 tf->tf_rdi = sigframe.info.lsi_signo; 315 tf->tf_rax = 0; 316 tf->tf_rsi = (long)&sfp->info; 317 tf->tf_rdx = (long)&sfp->uc; 318 319 /* 320 * Remember we use signal stack 321 */ 322 if (onstack) 323 l->l_sigstk.ss_flags |= SS_ONSTACK; 324 return; 325 } 326 327 int 328 linux_sys_modify_ldt(struct lwp *l, const struct linux_sys_modify_ldt_args *v, register_t *retval) 329 { 330 printf("linux_sys_modify_ldt\n"); 331 return 0; 332 } 333 334 int 335 linux_sys_iopl(struct lwp *l, const struct linux_sys_iopl_args *v, register_t *retval) 336 { 337 return 0; 338 } 339 340 int 341 linux_sys_ioperm(struct lwp *l, const struct linux_sys_ioperm_args *v, register_t *retval) 342 { 343 return 0; 344 } 345 346 dev_t 347 linux_fakedev(dev_t dev, int raw) 348 { 349 350 extern const struct cdevsw ptc_cdevsw, pts_cdevsw; 351 const struct cdevsw *cd = cdevsw_lookup(dev); 352 353 if (raw) { 354 #if (NWSDISPLAY > 0) 355 extern const struct cdevsw wsdisplay_cdevsw; 356 if (cd == &wsdisplay_cdevsw) 357 return makedev(LINUX_CONS_MAJOR, (minor(dev) + 1)); 358 #endif 359 } 360 361 if (cd == &ptc_cdevsw) 362 return makedev(LINUX_PTC_MAJOR, minor(dev)); 363 if (cd == &pts_cdevsw) 364 return makedev(LINUX_PTS_MAJOR, minor(dev)); 365 366 return ((minor(dev) & 0xff) | ((major(dev) & 0xfff) << 8) 367 | (((unsigned long long int) (minor(dev) & ~0xff)) << 12) 368 | (((unsigned long long int) (major(dev) & ~0xfff)) << 32)); 369 } 370 371 int 372 linux_machdepioctl(struct lwp *l, const struct linux_sys_ioctl_args *v, register_t *retval) 373 { 374 return 0; 375 } 376 377 int 378 linux_sys_rt_sigreturn(struct lwp *l, const void *v, register_t *retval) 379 { 380 struct linux_ucontext *luctx; 381 struct trapframe *tf = l->l_md.md_regs; 382 struct linux_sigcontext *lsigctx; 383 struct linux__fpstate fpstate; 384 struct linux_rt_sigframe frame, *fp; 385 ucontext_t uctx; 386 mcontext_t *mctx; 387 struct fxsave64 *fxarea; 388 int error; 389 390 fp = (struct linux_rt_sigframe *)(tf->tf_rsp - 8); 391 if ((error = copyin(fp, &frame, sizeof(frame))) != 0) { 392 mutex_enter(l->l_proc->p_lock); 393 sigexit(l, SIGILL); 394 return error; 395 } 396 luctx = &frame.uc; 397 lsigctx = &luctx->luc_mcontext; 398 399 memset(&uctx, 0, sizeof(uctx)); 400 mctx = (mcontext_t *)&uctx.uc_mcontext; 401 fxarea = (struct fxsave64 *)&mctx->__fpregs; 402 403 /* 404 * Set the flags. Linux always have CPU, stack and signal state, 405 * FPU is optional. uc_flags is not used to tell what we have. 406 */ 407 uctx.uc_flags = (_UC_SIGMASK|_UC_CPU|_UC_STACK|_UC_CLRSTACK); 408 if (lsigctx->fpstate != NULL) 409 uctx.uc_flags |= _UC_FPU; 410 uctx.uc_link = NULL; 411 412 /* 413 * Signal set 414 */ 415 linux_to_native_sigset(&uctx.uc_sigmask, &luctx->luc_sigmask); 416 417 /* 418 * CPU state 419 */ 420 mctx->__gregs[_REG_R8] = lsigctx->r8; 421 mctx->__gregs[_REG_R9] = lsigctx->r9; 422 mctx->__gregs[_REG_R10] = lsigctx->r10; 423 mctx->__gregs[_REG_R11] = lsigctx->r11; 424 mctx->__gregs[_REG_R12] = lsigctx->r12; 425 mctx->__gregs[_REG_R13] = lsigctx->r13; 426 mctx->__gregs[_REG_R14] = lsigctx->r14; 427 mctx->__gregs[_REG_R15] = lsigctx->r15; 428 mctx->__gregs[_REG_RDI] = lsigctx->rdi; 429 mctx->__gregs[_REG_RSI] = lsigctx->rsi; 430 mctx->__gregs[_REG_RBP] = lsigctx->rbp; 431 mctx->__gregs[_REG_RBX] = lsigctx->rbx; 432 mctx->__gregs[_REG_RAX] = lsigctx->rax; 433 mctx->__gregs[_REG_RDX] = lsigctx->rdx; 434 mctx->__gregs[_REG_RCX] = lsigctx->rcx; 435 mctx->__gregs[_REG_RIP] = lsigctx->rip; 436 mctx->__gregs[_REG_RFLAGS] = lsigctx->eflags; 437 mctx->__gregs[_REG_CS] = lsigctx->cs; 438 mctx->__gregs[_REG_GS] = lsigctx->gs; 439 mctx->__gregs[_REG_FS] = lsigctx->fs; 440 mctx->__gregs[_REG_ERR] = lsigctx->err; 441 mctx->__gregs[_REG_TRAPNO] = lsigctx->trapno; 442 mctx->__gregs[_REG_ES] = tf->tf_es; 443 mctx->__gregs[_REG_DS] = tf->tf_ds; 444 mctx->__gregs[_REG_RSP] = lsigctx->rsp; /* XXX */ 445 mctx->__gregs[_REG_SS] = tf->tf_ss; 446 447 /* 448 * FPU state 449 */ 450 if (lsigctx->fpstate != NULL) { 451 error = copyin(lsigctx->fpstate, &fpstate, sizeof(fpstate)); 452 if (error != 0) { 453 mutex_enter(l->l_proc->p_lock); 454 sigexit(l, SIGILL); 455 return error; 456 } 457 458 fxarea->fx_fcw = fpstate.cwd; 459 fxarea->fx_fsw = fpstate.swd; 460 fxarea->fx_ftw = fpstate.twd; 461 fxarea->fx_fop = fpstate.fop; 462 fxarea->fx_rip = fpstate.rip; 463 fxarea->fx_rdp = fpstate.rdp; 464 fxarea->fx_mxcsr = fpstate.mxcsr; 465 fxarea->fx_mxcsr_mask = fpstate.mxcsr_mask; 466 memcpy(&fxarea->fx_st, &fpstate.st_space, 467 sizeof(fxarea->fx_st)); 468 memcpy(&fxarea->fx_xmm, &fpstate.xmm_space, 469 sizeof(fxarea->fx_xmm)); 470 } 471 472 /* 473 * And the stack 474 */ 475 uctx.uc_stack.ss_flags = 0; 476 if (luctx->luc_stack.ss_flags & LINUX_SS_ONSTACK) 477 uctx.uc_stack.ss_flags |= SS_ONSTACK; 478 479 if (luctx->luc_stack.ss_flags & LINUX_SS_DISABLE) 480 uctx.uc_stack.ss_flags |= SS_DISABLE; 481 482 uctx.uc_stack.ss_sp = luctx->luc_stack.ss_sp; 483 uctx.uc_stack.ss_size = luctx->luc_stack.ss_size; 484 485 /* 486 * And let setucontext deal with that. 487 */ 488 mutex_enter(l->l_proc->p_lock); 489 error = setucontext(l, &uctx); 490 mutex_exit(l->l_proc->p_lock); 491 if (error) 492 return error; 493 494 return EJUSTRETURN; 495 } 496 497 int 498 linux_sys_arch_prctl(struct lwp *l, const struct linux_sys_arch_prctl_args *uap, register_t *retval) 499 { 500 /* { 501 syscallarg(int) code; 502 syscallarg(unsigned long) addr; 503 } */ 504 struct pcb *pcb = &l->l_addr->u_pcb; 505 struct trapframe *tf = l->l_md.md_regs; 506 int error; 507 uint64_t taddr; 508 509 switch(SCARG(uap, code)) { 510 case LINUX_ARCH_SET_GS: 511 taddr = SCARG(uap, addr); 512 if (taddr >= VM_MAXUSER_ADDRESS) 513 return EINVAL; 514 pcb->pcb_gs = taddr; 515 pcb->pcb_flags |= PCB_GS64; 516 if (l == curlwp) 517 wrmsr(MSR_KERNELGSBASE, taddr); 518 break; 519 520 case LINUX_ARCH_GET_GS: 521 if (pcb->pcb_flags & PCB_GS64) 522 taddr = pcb->pcb_gs; 523 else { 524 error = memseg_baseaddr(l, tf->tf_fs, NULL, 0, &taddr); 525 if (error != 0) 526 return error; 527 } 528 error = copyout(&taddr, (char *)SCARG(uap, addr), 8); 529 if (error != 0) 530 return error; 531 break; 532 533 case LINUX_ARCH_SET_FS: 534 taddr = SCARG(uap, addr); 535 if (taddr >= VM_MAXUSER_ADDRESS) 536 return EINVAL; 537 pcb->pcb_fs = taddr; 538 pcb->pcb_flags |= PCB_FS64; 539 if (l == curlwp) 540 wrmsr(MSR_FSBASE, taddr); 541 break; 542 543 case LINUX_ARCH_GET_FS: 544 if (pcb->pcb_flags & PCB_FS64) 545 taddr = pcb->pcb_fs; 546 else { 547 error = memseg_baseaddr(l, tf->tf_fs, NULL, 0, &taddr); 548 if (error != 0) 549 return error; 550 } 551 error = copyout(&taddr, (char *)SCARG(uap, addr), 8); 552 if (error != 0) 553 return error; 554 break; 555 556 default: 557 #ifdef DEBUG_LINUX 558 printf("linux_sys_arch_prctl: unexpected code %d\n", 559 SCARG(uap, code)); 560 #endif 561 return EINVAL; 562 } 563 564 return 0; 565 } 566 567 const int linux_vsyscall_to_syscall[] = { 568 LINUX_SYS_gettimeofday, 569 LINUX_SYS_time, 570 LINUX_SYS_nosys, /* nosys */ 571 LINUX_SYS_nosys, /* nosys */ 572 }; 573 574 int 575 linux_usertrap(struct lwp *l, vaddr_t trapaddr, void *arg) 576 { 577 struct trapframe *tf = arg; 578 uint64_t retaddr; 579 int vsyscallnr; 580 581 /* 582 * Check for a vsyscall. %rip must be the fault address, 583 * and the address must be in the Linux vsyscall area. 584 * Also, vsyscalls are only done at 1024-byte boundaries. 585 */ 586 587 if (__predict_true(trapaddr < LINUX_VSYSCALL_START)) 588 return 0; 589 590 if (trapaddr != tf->tf_rip) 591 return 0; 592 593 if ((tf->tf_rip & (LINUX_VSYSCALL_SIZE - 1)) != 0) 594 return 0; 595 596 vsyscallnr = (tf->tf_rip - LINUX_VSYSCALL_START) / LINUX_VSYSCALL_SIZE; 597 598 if (vsyscallnr > LINUX_VSYSCALL_MAXNR) 599 return 0; 600 601 /* 602 * Get the return address from the top of the stack, 603 * and fix up the return address. 604 * This assumes the faulting instruction was callq *reg, 605 * which is the only way that vsyscalls are ever entered. 606 */ 607 if (copyin((void *)tf->tf_rsp, &retaddr, sizeof retaddr) != 0) 608 return 0; 609 tf->tf_rip = retaddr; 610 tf->tf_rax = linux_vsyscall_to_syscall[vsyscallnr]; 611 tf->tf_rsp += 8; /* "pop" the return address */ 612 613 #if 0 614 printf("usertrap: rip %p rsp %p retaddr %p vsys %d sys %d\n", 615 (void *)tf->tf_rip, (void *)tf->tf_rsp, (void *)retaddr, 616 vsyscallnr, (int)tf->tf_rax); 617 #endif 618 619 (*l->l_proc->p_md.md_syscall)(tf); 620 621 return 1; 622 } 623 624 static void 625 linux_buildcontext(struct lwp *l, void *catcher, void *f) 626 { 627 struct trapframe *tf = l->l_md.md_regs; 628 629 tf->tf_ds = GSEL(GUDATA_SEL, SEL_UPL); 630 tf->tf_rip = (u_int64_t)catcher; 631 tf->tf_cs = GSEL(GUCODE_SEL, SEL_UPL); 632 tf->tf_rflags &= ~PSL_CLEARSIG; 633 tf->tf_rsp = (u_int64_t)f; 634 tf->tf_ss = GSEL(GUDATA_SEL, SEL_UPL); 635 } 636 637 void * 638 linux_get_newtls(struct lwp *l) 639 { 640 struct trapframe *tf = l->l_md.md_regs; 641 642 return (void *)tf->tf_r8; 643 } 644 645 int 646 linux_set_newtls(struct lwp *l, void *tls) 647 { 648 struct linux_sys_arch_prctl_args cup; 649 register_t retval; 650 651 SCARG(&cup, code) = LINUX_ARCH_SET_FS; 652 SCARG(&cup, addr) = (unsigned long)tls; 653 654 return linux_sys_arch_prctl(l, &cup, &retval); 655 } 656