1 /* $NetBSD: linux_machdep.c,v 1.26 2007/12/08 18:36:05 dsl Exp $ */ 2 3 /*- 4 * Copyright (c) 2005 Emmanuel Dreyfus, all rights reserved. 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 3. All advertising materials mentioning features or use of this software 15 * must display the following acknowledgement: 16 * This product includes software developed by Emmanuel Dreyfus 17 * 4. The name of the author may not be used to endorse or promote 18 * products derived from this software without specific prior written 19 * permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE THE AUTHOR AND CONTRIBUTORS ``AS IS'' 22 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, 23 * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 24 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS 25 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 26 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 27 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 28 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 29 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 30 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 31 * POSSIBILITY OF SUCH DAMAGE. 32 */ 33 34 #include <sys/cdefs.h> 35 36 __KERNEL_RCSID(0, "$NetBSD: linux_machdep.c,v 1.26 2007/12/08 18:36:05 dsl Exp $"); 37 38 #include <sys/param.h> 39 #include <sys/types.h> 40 #include <sys/systm.h> 41 #include <sys/signal.h> 42 #include <sys/exec.h> 43 #include <sys/proc.h> 44 #include <sys/ptrace.h> /* for process_read_fpregs() */ 45 #include <sys/user.h> 46 #include <sys/wait.h> 47 #include <sys/ucontext.h> 48 #include <sys/conf.h> 49 50 #include <machine/reg.h> 51 #include <machine/pcb.h> 52 #include <machine/fpu.h> 53 #include <machine/mcontext.h> 54 #include <machine/specialreg.h> 55 #include <machine/vmparam.h> 56 57 /* 58 * To see whether wscons is configured (for virtual console ioctl calls). 59 */ 60 #if defined(_KERNEL_OPT) 61 #include "wsdisplay.h" 62 #endif 63 #if (NWSDISPLAY > 0) 64 #include <dev/wscons/wsconsio.h> 65 #include <dev/wscons/wsdisplay_usl_io.h> 66 #endif 67 68 69 #include <compat/linux/common/linux_signal.h> 70 #include <compat/linux/common/linux_errno.h> 71 #include <compat/linux/common/linux_exec.h> 72 #include <compat/linux/common/linux_ioctl.h> 73 #include <compat/linux/common/linux_prctl.h> 74 #include <compat/linux/common/linux_machdep.h> 75 #include <compat/linux/common/linux_ipc.h> 76 #include <compat/linux/common/linux_sem.h> 77 #include <compat/linux/linux_syscall.h> 78 #include <compat/linux/linux_syscallargs.h> 79 80 static void linux_buildcontext(struct lwp *, void *, void *); 81 82 void 83 linux_setregs(l, epp, stack) 84 struct lwp *l; 85 struct exec_package *epp; 86 u_long stack; 87 { 88 struct pcb *pcb = &l->l_addr->u_pcb; 89 struct trapframe *tf; 90 91 /* If we were using the FPU, forget about it. */ 92 if (l->l_addr->u_pcb.pcb_fpcpu != NULL) 93 fpusave_lwp(l, 0); 94 95 l->l_md.md_flags &= ~MDP_USEDFPU; 96 pcb->pcb_flags = 0; 97 pcb->pcb_savefpu.fp_fxsave.fx_fcw = __NetBSD_NPXCW__; 98 pcb->pcb_savefpu.fp_fxsave.fx_mxcsr = __INITIAL_MXCSR__; 99 pcb->pcb_savefpu.fp_fxsave.fx_mxcsr_mask = __INITIAL_MXCSR_MASK__; 100 pcb->pcb_fs = 0; 101 pcb->pcb_gs = 0; 102 103 l->l_proc->p_flag &= ~PK_32; 104 105 tf = l->l_md.md_regs; 106 tf->tf_rax = 0; 107 tf->tf_rbx = 0; 108 tf->tf_rcx = epp->ep_entry; 109 tf->tf_rdx = 0; 110 tf->tf_rsi = 0; 111 tf->tf_rdi = 0; 112 tf->tf_rbp = 0; 113 tf->tf_rsp = stack; 114 tf->tf_r8 = 0; 115 tf->tf_r9 = 0; 116 tf->tf_r10 = 0; 117 tf->tf_r11 = 0; 118 tf->tf_r12 = 0; 119 tf->tf_r13 = 0; 120 tf->tf_r14 = 0; 121 tf->tf_r15 = 0; 122 tf->tf_rip = epp->ep_entry; 123 tf->tf_rflags = PSL_USERSET; 124 tf->tf_cs = GSEL(GUCODE_SEL, SEL_UPL); 125 tf->tf_ss = GSEL(GUDATA_SEL, SEL_UPL); 126 tf->tf_ds = 0; 127 tf->tf_es = 0; 128 tf->tf_fs = 0; 129 tf->tf_gs = 0; 130 131 return; 132 } 133 134 void 135 linux_sendsig(const ksiginfo_t *ksi, const sigset_t *mask) 136 { 137 struct lwp *l = curlwp; 138 struct proc *p = l->l_proc; 139 struct sigacts *ps = p->p_sigacts; 140 int onstack, error; 141 int sig = ksi->ksi_signo; 142 struct linux_rt_sigframe *sfp, sigframe; 143 struct linux__fpstate *fpsp, fpstate; 144 struct fpreg fpregs; 145 struct trapframe *tf = l->l_md.md_regs; 146 sig_t catcher = SIGACTION(p, sig).sa_handler; 147 linux_sigset_t lmask; 148 char *sp; 149 150 /* Do we need to jump onto the signal stack? */ 151 onstack = 152 (l->l_sigstk.ss_flags & (SS_DISABLE | SS_ONSTACK)) == 0 && 153 (SIGACTION(p, sig).sa_flags & SA_ONSTACK) != 0; 154 155 /* Allocate space for the signal handler context. */ 156 if (onstack) 157 sp = ((char *)l->l_sigstk.ss_sp + 158 l->l_sigstk.ss_size); 159 else 160 sp = (char *)tf->tf_rsp - 128; 161 162 /* 163 * Save FPU state, if any 164 */ 165 if (l->l_md.md_flags & MDP_USEDFPU) { 166 sp = (char *) 167 (((long)sp - sizeof(struct linux__fpstate)) & ~0xfUL); 168 fpsp = (struct linux__fpstate *)sp; 169 } else 170 fpsp = NULL; 171 172 /* 173 * Populate the rt_sigframe 174 */ 175 sp = (char *) 176 ((((long)sp - sizeof(struct linux_rt_sigframe)) & ~0xfUL) - 8); 177 sfp = (struct linux_rt_sigframe *)sp; 178 179 bzero(&sigframe, sizeof(sigframe)); 180 if (ps->sa_sigdesc[sig].sd_vers != 0) 181 sigframe.pretcode = 182 (char *)(u_long)ps->sa_sigdesc[sig].sd_tramp; 183 else 184 sigframe.pretcode = NULL; 185 186 /* 187 * The user context 188 */ 189 sigframe.uc.luc_flags = 0; 190 sigframe.uc.luc_link = NULL; 191 192 /* This is used regardless of SA_ONSTACK in Linux */ 193 sigframe.uc.luc_stack.ss_sp = l->l_sigstk.ss_sp; 194 sigframe.uc.luc_stack.ss_size = l->l_sigstk.ss_size; 195 sigframe.uc.luc_stack.ss_flags = 0; 196 if (l->l_sigstk.ss_flags & SS_ONSTACK) 197 sigframe.uc.luc_stack.ss_flags |= LINUX_SS_ONSTACK; 198 if (l->l_sigstk.ss_flags & SS_DISABLE) 199 sigframe.uc.luc_stack.ss_flags |= LINUX_SS_DISABLE; 200 201 sigframe.uc.luc_mcontext.r8 = tf->tf_r8; 202 sigframe.uc.luc_mcontext.r9 = tf->tf_r9; 203 sigframe.uc.luc_mcontext.r10 = tf->tf_r10; 204 sigframe.uc.luc_mcontext.r11 = tf->tf_r11; 205 sigframe.uc.luc_mcontext.r12 = tf->tf_r12; 206 sigframe.uc.luc_mcontext.r13 = tf->tf_r13; 207 sigframe.uc.luc_mcontext.r14 = tf->tf_r14; 208 sigframe.uc.luc_mcontext.r15 = tf->tf_r15; 209 sigframe.uc.luc_mcontext.rdi = tf->tf_rdi; 210 sigframe.uc.luc_mcontext.rsi = tf->tf_rsi; 211 sigframe.uc.luc_mcontext.rbp = tf->tf_rbp; 212 sigframe.uc.luc_mcontext.rbx = tf->tf_rbx; 213 sigframe.uc.luc_mcontext.rdx = tf->tf_rdx; 214 sigframe.uc.luc_mcontext.rax = tf->tf_rax; 215 sigframe.uc.luc_mcontext.rcx = tf->tf_rcx; 216 sigframe.uc.luc_mcontext.rsp = tf->tf_rsp; 217 sigframe.uc.luc_mcontext.rip = tf->tf_rip; 218 sigframe.uc.luc_mcontext.eflags = tf->tf_rflags; 219 sigframe.uc.luc_mcontext.cs = tf->tf_cs; 220 sigframe.uc.luc_mcontext.gs = tf->tf_gs; 221 sigframe.uc.luc_mcontext.fs = tf->tf_fs; 222 sigframe.uc.luc_mcontext.err = tf->tf_err; 223 sigframe.uc.luc_mcontext.trapno = tf->tf_trapno; 224 native_to_linux_sigset(&lmask, mask); 225 sigframe.uc.luc_mcontext.oldmask = lmask.sig[0]; 226 sigframe.uc.luc_mcontext.cr2 = (long)l->l_addr->u_pcb.pcb_onfault; 227 sigframe.uc.luc_mcontext.fpstate = fpsp; 228 native_to_linux_sigset(&sigframe.uc.luc_sigmask, mask); 229 230 /* 231 * the siginfo structure 232 */ 233 sigframe.info.lsi_signo = native_to_linux_signo[sig]; 234 sigframe.info.lsi_errno = native_to_linux_errno[ksi->ksi_errno]; 235 sigframe.info.lsi_code = ksi->ksi_code; 236 237 /* XXX This is a rought conversion, taken from i386 code */ 238 switch (sigframe.info.lsi_signo) { 239 case LINUX_SIGILL: 240 case LINUX_SIGFPE: 241 case LINUX_SIGSEGV: 242 case LINUX_SIGBUS: 243 case LINUX_SIGTRAP: 244 sigframe.info._sifields._sigfault._addr = ksi->ksi_addr; 245 break; 246 case LINUX_SIGCHLD: 247 sigframe.info._sifields._sigchld._pid = ksi->ksi_pid; 248 sigframe.info._sifields._sigchld._uid = ksi->ksi_uid; 249 sigframe.info._sifields._sigchld._utime = ksi->ksi_utime; 250 sigframe.info._sifields._sigchld._stime = ksi->ksi_stime; 251 252 if (WCOREDUMP(ksi->ksi_status)) { 253 sigframe.info.lsi_code = LINUX_CLD_DUMPED; 254 sigframe.info._sifields._sigchld._status = 255 _WSTATUS(ksi->ksi_status); 256 } else if (_WSTATUS(ksi->ksi_status)) { 257 sigframe.info.lsi_code = LINUX_CLD_KILLED; 258 sigframe.info._sifields._sigchld._status = 259 _WSTATUS(ksi->ksi_status); 260 } else { 261 sigframe.info.lsi_code = LINUX_CLD_EXITED; 262 sigframe.info._sifields._sigchld._status = 263 ((ksi->ksi_status & 0xff00U) >> 8); 264 } 265 break; 266 case LINUX_SIGIO: 267 sigframe.info._sifields._sigpoll._band = ksi->ksi_band; 268 sigframe.info._sifields._sigpoll._fd = ksi->ksi_fd; 269 break; 270 default: 271 sigframe.info._sifields._sigchld._pid = ksi->ksi_pid; 272 sigframe.info._sifields._sigchld._uid = ksi->ksi_uid; 273 if ((sigframe.info.lsi_signo == LINUX_SIGALRM) || 274 (sigframe.info.lsi_signo >= LINUX_SIGRTMIN)) 275 sigframe.info._sifields._timer._sigval.sival_ptr = 276 ksi->ksi_value.sival_ptr; 277 break; 278 } 279 280 sendsig_reset(l, sig); 281 mutex_exit(&p->p_smutex); 282 error = 0; 283 284 /* 285 * Save FPU state, if any 286 */ 287 if (fpsp != NULL) { 288 (void)process_read_fpregs(l, &fpregs); 289 bzero(&fpstate, sizeof(fpstate)); 290 fpstate.cwd = fpregs.fp_fcw; 291 fpstate.swd = fpregs.fp_fsw; 292 fpstate.twd = fpregs.fp_ftw; 293 fpstate.fop = fpregs.fp_fop; 294 fpstate.rip = fpregs.fp_rip; 295 fpstate.rdp = fpregs.fp_rdp; 296 fpstate.mxcsr = fpregs.fp_mxcsr; 297 fpstate.mxcsr_mask = fpregs.fp_mxcsr_mask; 298 memcpy(&fpstate.st_space, &fpregs.fp_st, 299 sizeof(fpstate.st_space)); 300 memcpy(&fpstate.xmm_space, &fpregs.fp_xmm, 301 sizeof(fpstate.xmm_space)); 302 error = copyout(&fpstate, fpsp, sizeof(fpstate)); 303 } 304 305 if (error == 0) 306 error = copyout(&sigframe, sp, sizeof(sigframe)); 307 308 mutex_enter(&p->p_smutex); 309 310 if (error != 0) { 311 sigexit(l, SIGILL); 312 return; 313 } 314 315 linux_buildcontext(l, catcher, sp); 316 tf->tf_rdi = sigframe.info.lsi_signo; 317 tf->tf_rax = 0; 318 tf->tf_rsi = (long)&sfp->info; 319 tf->tf_rdx = (long)&sfp->uc; 320 321 /* 322 * Remember we use signal stack 323 */ 324 if (onstack) 325 l->l_sigstk.ss_flags |= SS_ONSTACK; 326 return; 327 } 328 329 int 330 linux_sys_modify_ldt(l, v, retval) 331 struct lwp *l; 332 void *v; 333 register_t *retval; 334 { 335 printf("linux_sys_modify_ldt\n"); 336 return 0; 337 } 338 339 int 340 linux_sys_iopl(l, v, retval) 341 struct lwp *l; 342 void *v; 343 register_t *retval; 344 { 345 return 0; 346 } 347 348 int 349 linux_sys_ioperm(l, v, retval) 350 struct lwp *l; 351 void *v; 352 register_t *retval; 353 { 354 return 0; 355 } 356 357 dev_t 358 linux_fakedev(dev_t dev, int raw) 359 { 360 361 extern const struct cdevsw ptc_cdevsw, pts_cdevsw; 362 const struct cdevsw *cd = cdevsw_lookup(dev); 363 364 if (raw) { 365 #if (NWSDISPLAY > 0) 366 extern const struct cdevsw wsdisplay_cdevsw; 367 if (cd == &wsdisplay_cdevsw) 368 return makedev(LINUX_CONS_MAJOR, (minor(dev) + 1)); 369 #endif 370 } 371 372 if (cd == &ptc_cdevsw) 373 return makedev(LINUX_PTC_MAJOR, minor(dev)); 374 if (cd == &pts_cdevsw) 375 return makedev(LINUX_PTS_MAJOR, minor(dev)); 376 377 return ((minor(dev) & 0xff) | ((major(dev) & 0xfff) << 8) 378 | (((unsigned long long int) (minor(dev) & ~0xff)) << 12) 379 | (((unsigned long long int) (major(dev) & ~0xfff)) << 32)); 380 } 381 382 int 383 linux_machdepioctl(l, v, retval) 384 struct lwp *l; 385 void *v; 386 register_t *retval; 387 { 388 return 0; 389 } 390 391 int 392 linux_sys_rt_sigreturn(l, v, retval) 393 struct lwp *l; 394 void *v; 395 register_t *retval; 396 { 397 struct linux_ucontext *luctx; 398 struct trapframe *tf = l->l_md.md_regs; 399 struct linux_sigcontext *lsigctx; 400 struct linux__fpstate fpstate; 401 struct linux_rt_sigframe frame, *fp; 402 ucontext_t uctx; 403 mcontext_t *mctx; 404 struct fxsave64 *fxarea; 405 int error; 406 407 fp = (struct linux_rt_sigframe *)(tf->tf_rsp - 8); 408 if ((error = copyin(fp, &frame, sizeof(frame))) != 0) { 409 mutex_enter(&l->l_proc->p_smutex); 410 sigexit(l, SIGILL); 411 return error; 412 } 413 luctx = &frame.uc; 414 lsigctx = &luctx->luc_mcontext; 415 416 bzero(&uctx, sizeof(uctx)); 417 mctx = (mcontext_t *)&uctx.uc_mcontext; 418 fxarea = (struct fxsave64 *)&mctx->__fpregs; 419 420 /* 421 * Set the flags. Linux always have CPU, stack and signal state, 422 * FPU is optional. uc_flags is not used to tell what we have. 423 */ 424 uctx.uc_flags = (_UC_SIGMASK|_UC_CPU|_UC_STACK|_UC_CLRSTACK); 425 if (lsigctx->fpstate != NULL) 426 uctx.uc_flags |= _UC_FPU; 427 uctx.uc_link = NULL; 428 429 /* 430 * Signal set 431 */ 432 linux_to_native_sigset(&uctx.uc_sigmask, &luctx->luc_sigmask); 433 434 /* 435 * CPU state 436 */ 437 mctx->__gregs[_REG_R8] = lsigctx->r8; 438 mctx->__gregs[_REG_R9] = lsigctx->r9; 439 mctx->__gregs[_REG_R10] = lsigctx->r10; 440 mctx->__gregs[_REG_R11] = lsigctx->r11; 441 mctx->__gregs[_REG_R12] = lsigctx->r12; 442 mctx->__gregs[_REG_R13] = lsigctx->r13; 443 mctx->__gregs[_REG_R14] = lsigctx->r14; 444 mctx->__gregs[_REG_R15] = lsigctx->r15; 445 mctx->__gregs[_REG_RDI] = lsigctx->rdi; 446 mctx->__gregs[_REG_RSI] = lsigctx->rsi; 447 mctx->__gregs[_REG_RBP] = lsigctx->rbp; 448 mctx->__gregs[_REG_RBX] = lsigctx->rbx; 449 mctx->__gregs[_REG_RAX] = lsigctx->rax; 450 mctx->__gregs[_REG_RDX] = lsigctx->rdx; 451 mctx->__gregs[_REG_RCX] = lsigctx->rcx; 452 mctx->__gregs[_REG_RIP] = lsigctx->rip; 453 mctx->__gregs[_REG_RFL] = lsigctx->eflags; 454 mctx->__gregs[_REG_CS] = lsigctx->cs; 455 mctx->__gregs[_REG_GS] = lsigctx->gs; 456 mctx->__gregs[_REG_FS] = lsigctx->fs; 457 mctx->__gregs[_REG_ERR] = lsigctx->err; 458 mctx->__gregs[_REG_TRAPNO] = lsigctx->trapno; 459 mctx->__gregs[_REG_ES] = tf->tf_es; 460 mctx->__gregs[_REG_DS] = tf->tf_ds; 461 mctx->__gregs[_REG_URSP] = lsigctx->rsp; /* XXX */ 462 mctx->__gregs[_REG_SS] = tf->tf_ss; 463 464 /* 465 * FPU state 466 */ 467 if (lsigctx->fpstate != NULL) { 468 error = copyin(lsigctx->fpstate, &fpstate, sizeof(fpstate)); 469 if (error != 0) { 470 mutex_enter(&l->l_proc->p_smutex); 471 sigexit(l, SIGILL); 472 return error; 473 } 474 475 fxarea->fx_fcw = fpstate.cwd; 476 fxarea->fx_fsw = fpstate.swd; 477 fxarea->fx_ftw = fpstate.twd; 478 fxarea->fx_fop = fpstate.fop; 479 fxarea->fx_rip = fpstate.rip; 480 fxarea->fx_rdp = fpstate.rdp; 481 fxarea->fx_mxcsr = fpstate.mxcsr; 482 fxarea->fx_mxcsr_mask = fpstate.mxcsr_mask; 483 memcpy(&fxarea->fx_st, &fpstate.st_space, 484 sizeof(fxarea->fx_st)); 485 memcpy(&fxarea->fx_xmm, &fpstate.xmm_space, 486 sizeof(fxarea->fx_xmm)); 487 } 488 489 /* 490 * And the stack 491 */ 492 uctx.uc_stack.ss_flags = 0; 493 if (luctx->luc_stack.ss_flags & LINUX_SS_ONSTACK); 494 uctx.uc_stack.ss_flags = SS_ONSTACK; 495 496 if (luctx->luc_stack.ss_flags & LINUX_SS_DISABLE); 497 uctx.uc_stack.ss_flags = SS_DISABLE; 498 499 uctx.uc_stack.ss_sp = luctx->luc_stack.ss_sp; 500 uctx.uc_stack.ss_size = luctx->luc_stack.ss_size; 501 502 /* 503 * And let setucontext deal with that. 504 */ 505 mutex_enter(&l->l_proc->p_smutex); 506 error = setucontext(l, &uctx); 507 mutex_exit(&l->l_proc->p_smutex); 508 if (error) 509 return error; 510 511 return EJUSTRETURN; 512 } 513 514 int 515 linux_sys_arch_prctl(struct lwp *l, void *v, register_t *retval) 516 { 517 struct linux_sys_arch_prctl_args /* { 518 syscallarg(int) code; 519 syscallarg(unsigned long) addr; 520 } */ *uap = v; 521 struct pcb *pcb = &l->l_addr->u_pcb; 522 struct trapframe *tf = l->l_md.md_regs; 523 int error; 524 uint64_t taddr; 525 526 switch(SCARG(uap, code)) { 527 case LINUX_ARCH_SET_GS: 528 taddr = SCARG(uap, addr); 529 if (taddr >= VM_MAXUSER_ADDRESS) 530 return EINVAL; 531 pcb->pcb_gs = taddr; 532 pcb->pcb_flags |= PCB_GS64; 533 if (l == curlwp) 534 wrmsr(MSR_KERNELGSBASE, taddr); 535 break; 536 537 case LINUX_ARCH_GET_GS: 538 if (pcb->pcb_flags & PCB_GS64) 539 taddr = pcb->pcb_gs; 540 else { 541 error = memseg_baseaddr(l, tf->tf_fs, NULL, 0, &taddr); 542 if (error != 0) 543 return error; 544 } 545 error = copyout(&taddr, (char *)SCARG(uap, addr), 8); 546 if (error != 0) 547 return error; 548 break; 549 550 case LINUX_ARCH_SET_FS: 551 taddr = SCARG(uap, addr); 552 if (taddr >= VM_MAXUSER_ADDRESS) 553 return EINVAL; 554 pcb->pcb_fs = taddr; 555 pcb->pcb_flags |= PCB_FS64; 556 if (l == curlwp) 557 wrmsr(MSR_FSBASE, taddr); 558 break; 559 560 case LINUX_ARCH_GET_FS: 561 if (pcb->pcb_flags & PCB_FS64) 562 taddr = pcb->pcb_fs; 563 else { 564 error = memseg_baseaddr(l, tf->tf_fs, NULL, 0, &taddr); 565 if (error != 0) 566 return error; 567 } 568 error = copyout(&taddr, (char *)SCARG(uap, addr), 8); 569 if (error != 0) 570 return error; 571 break; 572 573 default: 574 #ifdef DEBUG_LINUX 575 printf("linux_sys_arch_prctl: unexpected code %d\n", 576 SCARG(uap, code)); 577 #endif 578 return EINVAL; 579 } 580 581 return 0; 582 } 583 584 const int linux_vsyscall_to_syscall[] = { 585 LINUX_SYS_gettimeofday, 586 LINUX_SYS_time, 587 LINUX_SYS_nosys, 588 LINUX_SYS_nosys, 589 }; 590 591 int 592 linux_usertrap(struct lwp *l, vaddr_t trapaddr, void *arg) 593 { 594 struct trapframe *tf = arg; 595 uint64_t retaddr; 596 int vsyscallnr; 597 598 /* 599 * Check for a vsyscall. %rip must be the fault address, 600 * and the address must be in the Linux vsyscall area. 601 * Also, vsyscalls are only done at 1024-byte boundaries. 602 */ 603 604 if (__predict_true(trapaddr < LINUX_VSYSCALL_START)) 605 return 0; 606 607 if (trapaddr != tf->tf_rip) 608 return 0; 609 610 if ((tf->tf_rip & (LINUX_VSYSCALL_SIZE - 1)) != 0) 611 return 0; 612 613 vsyscallnr = (tf->tf_rip - LINUX_VSYSCALL_START) / LINUX_VSYSCALL_SIZE; 614 615 if (vsyscallnr > LINUX_VSYSCALL_MAXNR) 616 return 0; 617 618 /* 619 * Get the return address from the top of the stack, 620 * and fix up the return address. 621 * This assumes the faulting instruction was callq *reg, 622 * which is the only way that vsyscalls are ever entered. 623 */ 624 if (copyin((void *)tf->tf_rsp, &retaddr, sizeof retaddr) != 0) 625 return 0; 626 tf->tf_rip = retaddr; 627 tf->tf_rax = linux_vsyscall_to_syscall[vsyscallnr]; 628 tf->tf_rsp += 8; /* "pop" the return address */ 629 630 #if 0 631 printf("usertrap: rip %p rsp %p retaddr %p vsys %d sys %d\n", 632 (void *)tf->tf_rip, (void *)tf->tf_rsp, (void *)retaddr, 633 vsyscallnr, (int)tf->tf_rax); 634 #endif 635 636 (*l->l_proc->p_md.md_syscall)(tf); 637 638 return 1; 639 } 640 641 static void 642 linux_buildcontext(struct lwp *l, void *catcher, void *f) 643 { 644 struct trapframe *tf = l->l_md.md_regs; 645 646 tf->tf_ds = GSEL(GUDATA_SEL, SEL_UPL); 647 tf->tf_rip = (u_int64_t)catcher; 648 tf->tf_cs = GSEL(GUCODE_SEL, SEL_UPL); 649 tf->tf_rflags &= ~(PSL_T|PSL_VM|PSL_AC); 650 tf->tf_rsp = (u_int64_t)f; 651 tf->tf_ss = GSEL(GUDATA_SEL, SEL_UPL); 652 } 653 654 void * 655 linux_get_newtls(struct lwp *l) 656 { 657 struct trapframe *tf = l->l_md.md_regs; 658 659 return (void *)tf->tf_r8; 660 } 661 662 int 663 linux_set_newtls(struct lwp *l, void *tls) 664 { 665 struct linux_sys_arch_prctl_args cup; 666 register_t retval; 667 668 SCARG(&cup, code) = LINUX_ARCH_SET_FS; 669 SCARG(&cup, addr) = (unsigned long)tls; 670 671 return linux_sys_arch_prctl(l, &cup, &retval); 672 } 673