1 /* $NetBSD: linux_machdep.c,v 1.40 2009/11/23 00:46:07 rmind Exp $ */ 2 3 /*- 4 * Copyright (c) 1995, 2000, 2001 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Frank van der Linden and Emmanuel Dreyfus. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 29 * POSSIBILITY OF SUCH DAMAGE. 30 */ 31 32 #include <sys/cdefs.h> 33 __KERNEL_RCSID(0, "$NetBSD: linux_machdep.c,v 1.40 2009/11/23 00:46:07 rmind Exp $"); 34 35 #include <sys/param.h> 36 #include <sys/systm.h> 37 #include <sys/signalvar.h> 38 #include <sys/kernel.h> 39 #include <sys/proc.h> 40 #include <sys/buf.h> 41 #include <sys/reboot.h> 42 #include <sys/conf.h> 43 #include <sys/exec.h> 44 #include <sys/file.h> 45 #include <sys/callout.h> 46 #include <sys/malloc.h> 47 #include <sys/mbuf.h> 48 #include <sys/msgbuf.h> 49 #include <sys/mount.h> 50 #include <sys/vnode.h> 51 #include <sys/device.h> 52 #include <sys/syscallargs.h> 53 #include <sys/filedesc.h> 54 #include <sys/exec_elf.h> 55 #include <sys/disklabel.h> 56 #include <sys/ioctl.h> 57 #include <miscfs/specfs/specdev.h> 58 59 #include <compat/linux/common/linux_types.h> 60 #include <compat/linux/common/linux_signal.h> 61 #include <compat/linux/common/linux_util.h> 62 #include <compat/linux/common/linux_ioctl.h> 63 #include <compat/linux/common/linux_hdio.h> 64 #include <compat/linux/common/linux_exec.h> 65 #include <compat/linux/common/linux_machdep.h> 66 67 #include <compat/linux/linux_syscallargs.h> 68 69 #include <sys/cpu.h> 70 #include <machine/fpu.h> 71 #include <machine/psl.h> 72 #include <machine/reg.h> 73 #include <machine/vmparam.h> 74 75 /* 76 * To see whether wscons is configured (for virtual console ioctl calls). 77 */ 78 #if defined(_KERNEL_OPT) 79 #include "wsdisplay.h" 80 #endif 81 #if (NWSDISPLAY > 0) 82 #include <dev/wscons/wsconsio.h> 83 #include <dev/wscons/wsdisplay_usl_io.h> 84 #endif 85 86 /* 87 * Set set up registers on exec. 88 * XXX not used at the moment since in sys/kern/exec_conf, LINUX_COMPAT 89 * entry uses NetBSD's native setregs instead of linux_setregs 90 */ 91 void 92 linux_setregs(struct lwp *l, struct exec_package *pack, u_long stack) 93 { 94 setregs(l, pack, stack); 95 } 96 97 /* 98 * Send an interrupt to process. 99 * 100 * Adapted from arch/powerpc/powerpc/sig_machdep.c:sendsig and 101 * compat/linux/arch/i386/linux_machdep.c:linux_sendsig 102 * 103 * XXX Does not work well yet with RT signals 104 * 105 */ 106 107 void 108 linux_sendsig(const ksiginfo_t *ksi, const sigset_t *mask) 109 { 110 const int sig = ksi->ksi_signo; 111 struct lwp *l = curlwp; 112 struct proc *p = l->l_proc; 113 struct trapframe *tf; 114 sig_t catcher = SIGACTION(p, sig).sa_handler; 115 struct linux_sigregs frame; 116 struct linux_pt_regs linux_regs; 117 struct linux_sigcontext sc; 118 register_t fp; 119 int onstack, error; 120 int i; 121 122 tf = trapframe(l); 123 124 /* 125 * Do we need to jump onto the signal stack? 126 */ 127 onstack = 128 (l->l_sigstk.ss_flags & (SS_DISABLE | SS_ONSTACK)) == 0 && 129 (SIGACTION(p, sig).sa_flags & SA_ONSTACK) != 0; 130 131 /* 132 * Signal stack is broken (see at the end of linux_sigreturn), so we do 133 * not use it yet. XXX fix this. 134 */ 135 onstack=0; 136 137 /* 138 * Allocate space for the signal handler context. 139 */ 140 if (onstack) { 141 fp = (register_t) 142 ((char *)l->l_sigstk.ss_sp + 143 l->l_sigstk.ss_size); 144 } else { 145 fp = tf->fixreg[1]; 146 } 147 #ifdef DEBUG_LINUX 148 printf("fp at start of linux_sendsig = %x\n", fp); 149 #endif 150 fp -= sizeof(struct linux_sigregs); 151 fp &= ~0xf; 152 153 /* 154 * Prepare a sigcontext for later. 155 */ 156 memset(&sc, 0, sizeof sc); 157 sc.lsignal = (int)native_to_linux_signo[sig]; 158 sc.lhandler = (unsigned long)catcher; 159 native_to_linux_old_extra_sigset(&sc.lmask, &sc._unused[3], mask); 160 sc.lregs = (struct linux_pt_regs*)fp; 161 162 /* 163 * Setup the signal stack frame as Linux does it in 164 * arch/ppc/kernel/signal.c:setup_frame() 165 * 166 * Save register context. 167 */ 168 for (i = 0; i < 32; i++) 169 linux_regs.lgpr[i] = tf->fixreg[i]; 170 linux_regs.lnip = tf->srr0; 171 linux_regs.lmsr = tf->srr1 & PSL_USERSRR1; 172 linux_regs.lorig_gpr3 = tf->fixreg[3]; /* XXX Is that right? */ 173 linux_regs.lctr = tf->ctr; 174 linux_regs.llink = tf->lr; 175 linux_regs.lxer = tf->xer; 176 linux_regs.lccr = tf->cr; 177 linux_regs.lmq = 0; /* Unused, 601 only */ 178 linux_regs.ltrap = tf->exc; 179 linux_regs.ldar = tf->dar; 180 linux_regs.ldsisr = tf->dsisr; 181 linux_regs.lresult = 0; 182 183 memset(&frame, 0, sizeof(frame)); 184 memcpy(&frame.lgp_regs, &linux_regs, sizeof(linux_regs)); 185 186 save_fpu_lwp(curlwp, FPU_SAVE); 187 memcpy(&frame.lfp_regs, curpcb->pcb_fpu.fpreg, sizeof(frame.lfp_regs)); 188 189 /* 190 * Copy Linux's signal trampoline on the user stack It should not 191 * be used, but Linux binaries might expect it to be there. 192 */ 193 frame.ltramp[0] = 0x38997777; /* li r0, 0x7777 */ 194 frame.ltramp[1] = 0x44000002; /* sc */ 195 196 /* 197 * Move it to the user stack 198 * There is a little trick here, about the LINUX_ABIGAP: the 199 * linux_sigreg structure has a 56 int gap to support rs6000/xcoff 200 * binaries. But the Linux kernel seems to do without it, and it 201 * just skip it when building the stack frame. Hence the LINUX_ABIGAP. 202 */ 203 sendsig_reset(l, sig); 204 mutex_exit(p->p_lock); 205 error = copyout(&frame, (void *)fp, sizeof (frame) - LINUX_ABIGAP); 206 207 if (error != 0) { 208 /* 209 * Process has trashed its stack; give it an illegal 210 * instruction to halt it in its tracks. 211 */ 212 mutex_enter(p->p_lock); 213 sigexit(l, SIGILL); 214 /* NOTREACHED */ 215 } 216 217 /* 218 * Add a sigcontext on the stack 219 */ 220 fp -= sizeof(struct linux_sigcontext); 221 error = copyout(&sc, (void *)fp, sizeof (struct linux_sigcontext)); 222 mutex_enter(p->p_lock); 223 224 if (error != 0) { 225 /* 226 * Process has trashed its stack; give it an illegal 227 * instruction to halt it in its tracks. 228 */ 229 sigexit(l, SIGILL); 230 /* NOTREACHED */ 231 } 232 233 /* 234 * Set the registers according to how the Linux process expects them. 235 * "Mind the gap" Linux expects a gap here. 236 */ 237 tf->fixreg[1] = fp - LINUX__SIGNAL_FRAMESIZE; 238 tf->lr = (int)catcher; 239 tf->fixreg[3] = (int)native_to_linux_signo[sig]; 240 tf->fixreg[4] = fp; 241 tf->srr0 = (int)p->p_sigctx.ps_sigcode; 242 243 #ifdef DEBUG_LINUX 244 printf("fp at end of linux_sendsig = %x\n", fp); 245 #endif 246 /* 247 * Remember that we're now on the signal stack. 248 */ 249 if (onstack) 250 l->l_sigstk.ss_flags |= SS_ONSTACK; 251 #ifdef DEBUG_LINUX 252 printf("linux_sendsig: exitting. fp=0x%lx\n",(long)fp); 253 #endif 254 } 255 256 /* 257 * System call to cleanup state after a signal 258 * has been taken. Reset signal mask and 259 * stack state from context left by sendsig (above). 260 * Return to previous pc and psl as specified by 261 * context left by sendsig. Check carefully to 262 * make sure that the user has not modified the 263 * psl to gain improper privileges or to cause 264 * a machine fault. 265 * 266 * XXX not tested 267 */ 268 int 269 linux_sys_rt_sigreturn(struct lwp *l, const struct linux_sys_rt_sigreturn_args *uap, register_t *retval) 270 { 271 /* { 272 syscallarg(struct linux_rt_sigframe *) sfp; 273 } */ 274 struct proc *p = l->l_proc; 275 struct linux_rt_sigframe *scp, sigframe; 276 struct linux_sigregs sregs; 277 struct linux_pt_regs *lregs; 278 struct trapframe *tf; 279 sigset_t mask; 280 int i; 281 282 /* 283 * The trampoline code hands us the context. 284 * It is unsafe to keep track of it ourselves, in the event that a 285 * program jumps out of a signal handler. 286 */ 287 scp = SCARG(uap, sfp); 288 289 /* 290 * Get the context from user stack 291 */ 292 if (copyin((void *)scp, &sigframe, sizeof(*scp))) 293 return (EFAULT); 294 295 /* 296 * Restore register context. 297 */ 298 if (copyin((void *)sigframe.luc.luc_context.lregs, 299 &sregs, sizeof(sregs))) 300 return (EFAULT); 301 lregs = (struct linux_pt_regs *)&sregs.lgp_regs; 302 303 tf = trapframe(l); 304 #ifdef DEBUG_LINUX 305 (unsigned long)tf, (unsigned long)scp); 306 #endif 307 308 if (!PSL_USEROK_P(lregs->lmsr)) 309 return (EINVAL); 310 311 for (i = 0; i < 32; i++) 312 tf->fixreg[i] = lregs->lgpr[i]; 313 tf->lr = lregs->llink; 314 tf->cr = lregs->lccr; 315 tf->xer = lregs->lxer; 316 tf->ctr = lregs->lctr; 317 tf->srr0 = lregs->lnip; 318 tf->srr1 = lregs->lmsr; 319 320 /* 321 * Make sure the fpu state is discarded 322 */ 323 save_fpu_lwp(curlwp, FPU_DISCARD); 324 325 memcpy(curpcb->pcb_fpu.fpreg, (void *)&sregs.lfp_regs, 326 sizeof(curpcb->pcb_fpu.fpreg)); 327 328 mutex_enter(p->p_lock); 329 330 /* 331 * Restore signal stack. 332 * 333 * XXX cannot find the onstack information in Linux sig context. 334 * Is signal stack really supported on Linux? 335 * 336 * It seems to be supported in libc6... 337 */ 338 /* if (sc.sc_onstack & SS_ONSTACK) 339 l->l_sigstk.ss_flags |= SS_ONSTACK; 340 else */ 341 l->l_sigstk.ss_flags &= ~SS_ONSTACK; 342 343 /* 344 * Grab the signal mask 345 */ 346 linux_to_native_sigset(&mask, &sigframe.luc.luc_sigmask); 347 (void) sigprocmask1(l, SIG_SETMASK, &mask, 0); 348 349 mutex_exit(p->p_lock); 350 351 return (EJUSTRETURN); 352 } 353 354 355 /* 356 * The following needs code review for potential security issues 357 */ 358 int 359 linux_sys_sigreturn(struct lwp *l, const struct linux_sys_sigreturn_args *uap, register_t *retval) 360 { 361 /* { 362 syscallarg(struct linux_sigcontext *) scp; 363 } */ 364 struct proc *p = l->l_proc; 365 struct linux_sigcontext *scp, context; 366 struct linux_sigregs sregs; 367 struct linux_pt_regs *lregs; 368 struct trapframe *tf; 369 sigset_t mask; 370 int i; 371 372 /* 373 * The trampoline code hands us the context. 374 * It is unsafe to keep track of it ourselves, in the event that a 375 * program jumps out of a signal handler. 376 */ 377 scp = SCARG(uap, scp); 378 379 /* 380 * Get the context from user stack 381 */ 382 if (copyin(scp, &context, sizeof(*scp))) 383 return (EFAULT); 384 385 /* 386 * Restore register context. 387 */ 388 if (copyin((void *)context.lregs, &sregs, sizeof(sregs))) 389 return (EFAULT); 390 lregs = (struct linux_pt_regs *)&sregs.lgp_regs; 391 392 tf = trapframe(l); 393 #ifdef DEBUG_LINUX 394 printf("linux_sys_sigreturn: trapframe=0x%lx scp=0x%lx\n", 395 (unsigned long)tf, (unsigned long)scp); 396 #endif 397 398 if (!PSL_USEROK_P(lregs->lmsr)) 399 return (EINVAL); 400 401 for (i = 0; i < 32; i++) 402 tf->fixreg[i] = lregs->lgpr[i]; 403 tf->lr = lregs->llink; 404 tf->cr = lregs->lccr; 405 tf->xer = lregs->lxer; 406 tf->ctr = lregs->lctr; 407 tf->srr0 = lregs->lnip; 408 tf->srr1 = lregs->lmsr; 409 410 /* 411 * Make sure the fpu state is discarded 412 */ 413 save_fpu_lwp(curlwp, FPU_DISCARD); 414 415 memcpy(curpcb->pcb_fpu.fpreg, (void *)&sregs.lfp_regs, 416 sizeof(curpcb->pcb_fpu.fpreg)); 417 418 mutex_enter(p->p_lock); 419 420 /* 421 * Restore signal stack. 422 * 423 * XXX cannot find the onstack information in Linux sig context. 424 * Is signal stack really supported on Linux? 425 */ 426 #if 0 427 if (sc.sc_onstack & SS_ONSTACK) 428 l->l_sigstk.ss_flags |= SS_ONSTACK; 429 else 430 #endif 431 l->l_sigstk.ss_flags &= ~SS_ONSTACK; 432 433 /* Restore signal mask. */ 434 linux_old_extra_to_native_sigset(&mask, &context.lmask, 435 &context._unused[3]); 436 (void) sigprocmask1(l, SIG_SETMASK, &mask, 0); 437 438 mutex_exit(p->p_lock); 439 440 return (EJUSTRETURN); 441 } 442 443 444 #if 0 445 int 446 linux_sys_modify_ldt(struct proc *p, void *v, register_t *retval) 447 { 448 /* 449 * This syscall is not implemented in Linux/PowerPC: we should not 450 * be here 451 */ 452 #ifdef DEBUG_LINUX 453 printf("linux_sys_modify_ldt: should not be here.\n"); 454 #endif 455 return 0; 456 } 457 #endif 458 459 /* 460 * major device numbers remapping 461 */ 462 dev_t 463 linux_fakedev(dev_t dev, int raw) 464 { 465 /* XXX write me */ 466 return dev; 467 } 468 469 /* 470 * We come here in a last attempt to satisfy a Linux ioctl() call 471 */ 472 int 473 linux_machdepioctl(struct lwp *l, const struct linux_sys_ioctl_args *uap, register_t *retval) 474 { 475 /* { 476 syscallarg(int) fd; 477 syscallarg(u_long) com; 478 syscallarg(void *) data; 479 } */ 480 struct sys_ioctl_args bia; 481 u_long com; 482 483 SCARG(&bia, fd) = SCARG(uap, fd); 484 SCARG(&bia, data) = SCARG(uap, data); 485 com = SCARG(uap, com); 486 487 switch (com) { 488 default: 489 printf("linux_machdepioctl: invalid ioctl %08lx\n", com); 490 return EINVAL; 491 } 492 SCARG(&bia, com) = com; 493 /* XXX NJWLWP */ 494 return sys_ioctl(curlwp, &bia, retval); 495 } 496 #if 0 497 /* 498 * Set I/O permissions for a process. Just set the maximum level 499 * right away (ignoring the argument), otherwise we would have 500 * to rely on I/O permission maps, which are not implemented. 501 */ 502 int 503 linux_sys_iopl(struct lwp *l, const void *v, register_t *retval) 504 { 505 /* 506 * This syscall is not implemented in Linux/PowerPC: we should not be here 507 */ 508 #ifdef DEBUG_LINUX 509 printf("linux_sys_iopl: should not be here.\n"); 510 #endif 511 return 0; 512 } 513 #endif 514 515 /* 516 * See above. If a root process tries to set access to an I/O port, 517 * just let it have the whole range. 518 */ 519 int 520 linux_sys_ioperm(struct lwp *l, const struct linux_sys_ioperm_args *uap, register_t *retval) 521 { 522 /* 523 * This syscall is not implemented in Linux/PowerPC: we should not be here 524 */ 525 #ifdef DEBUG_LINUX 526 printf("linux_sys_ioperm: should not be here.\n"); 527 #endif 528 return 0; 529 } 530 531 /* 532 * wrapper linux_sys_new_uname() -> linux_sys_uname() 533 */ 534 int 535 linux_sys_new_uname(struct lwp *l, const struct linux_sys_new_uname_args *uap, register_t *retval) 536 { 537 return linux_sys_uname(l, (const void *)uap, retval); 538 } 539 540 /* 541 * wrapper linux_sys_new_select() -> linux_sys_select() 542 */ 543 int 544 linux_sys_new_select(struct lwp *l, const struct linux_sys_new_select_args *uap, register_t *retval) 545 { 546 return linux_sys_select(l, (const void *)uap, retval); 547 } 548 549 int 550 linux_usertrap(struct lwp *l, vaddr_t trapaddr, void *arg) 551 { 552 return 0; 553 } 554