1 /* $NetBSD: linux_machdep.c,v 1.39 2008/04/28 20:23:43 martin Exp $ */ 2 3 /*- 4 * Copyright (c) 1995, 2000, 2001 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Frank van der Linden and Emmanuel Dreyfus. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 29 * POSSIBILITY OF SUCH DAMAGE. 30 */ 31 32 #include <sys/cdefs.h> 33 __KERNEL_RCSID(0, "$NetBSD: linux_machdep.c,v 1.39 2008/04/28 20:23:43 martin Exp $"); 34 35 #include <sys/param.h> 36 #include <sys/systm.h> 37 #include <sys/signalvar.h> 38 #include <sys/kernel.h> 39 #include <sys/proc.h> 40 #include <sys/user.h> 41 #include <sys/buf.h> 42 #include <sys/reboot.h> 43 #include <sys/conf.h> 44 #include <sys/exec.h> 45 #include <sys/file.h> 46 #include <sys/callout.h> 47 #include <sys/malloc.h> 48 #include <sys/mbuf.h> 49 #include <sys/msgbuf.h> 50 #include <sys/mount.h> 51 #include <sys/vnode.h> 52 #include <sys/device.h> 53 #include <sys/syscallargs.h> 54 #include <sys/filedesc.h> 55 #include <sys/exec_elf.h> 56 #include <sys/disklabel.h> 57 #include <sys/ioctl.h> 58 #include <miscfs/specfs/specdev.h> 59 60 #include <compat/linux/common/linux_types.h> 61 #include <compat/linux/common/linux_signal.h> 62 #include <compat/linux/common/linux_util.h> 63 #include <compat/linux/common/linux_ioctl.h> 64 #include <compat/linux/common/linux_hdio.h> 65 #include <compat/linux/common/linux_exec.h> 66 #include <compat/linux/common/linux_machdep.h> 67 68 #include <compat/linux/linux_syscallargs.h> 69 70 #include <sys/cpu.h> 71 #include <machine/fpu.h> 72 #include <machine/psl.h> 73 #include <machine/reg.h> 74 #include <machine/vmparam.h> 75 76 /* 77 * To see whether wscons is configured (for virtual console ioctl calls). 78 */ 79 #if defined(_KERNEL_OPT) 80 #include "wsdisplay.h" 81 #endif 82 #if (NWSDISPLAY > 0) 83 #include <dev/wscons/wsconsio.h> 84 #include <dev/wscons/wsdisplay_usl_io.h> 85 #endif 86 87 /* 88 * Set set up registers on exec. 89 * XXX not used at the moment since in sys/kern/exec_conf, LINUX_COMPAT 90 * entry uses NetBSD's native setregs instead of linux_setregs 91 */ 92 void 93 linux_setregs(struct lwp *l, struct exec_package *pack, u_long stack) 94 { 95 setregs(l, pack, stack); 96 } 97 98 /* 99 * Send an interrupt to process. 100 * 101 * Adapted from arch/powerpc/powerpc/sig_machdep.c:sendsig and 102 * compat/linux/arch/i386/linux_machdep.c:linux_sendsig 103 * 104 * XXX Does not work well yet with RT signals 105 * 106 */ 107 108 void 109 linux_sendsig(const ksiginfo_t *ksi, const sigset_t *mask) 110 { 111 const int sig = ksi->ksi_signo; 112 struct lwp *l = curlwp; 113 struct proc *p = l->l_proc; 114 struct trapframe *tf; 115 sig_t catcher = SIGACTION(p, sig).sa_handler; 116 struct linux_sigregs frame; 117 struct linux_pt_regs linux_regs; 118 struct linux_sigcontext sc; 119 register_t fp; 120 int onstack, error; 121 int i; 122 123 tf = trapframe(l); 124 125 /* 126 * Do we need to jump onto the signal stack? 127 */ 128 onstack = 129 (l->l_sigstk.ss_flags & (SS_DISABLE | SS_ONSTACK)) == 0 && 130 (SIGACTION(p, sig).sa_flags & SA_ONSTACK) != 0; 131 132 /* 133 * Signal stack is broken (see at the end of linux_sigreturn), so we do 134 * not use it yet. XXX fix this. 135 */ 136 onstack=0; 137 138 /* 139 * Allocate space for the signal handler context. 140 */ 141 if (onstack) { 142 fp = (register_t) 143 ((char *)l->l_sigstk.ss_sp + 144 l->l_sigstk.ss_size); 145 } else { 146 fp = tf->fixreg[1]; 147 } 148 #ifdef DEBUG_LINUX 149 printf("fp at start of linux_sendsig = %x\n", fp); 150 #endif 151 fp -= sizeof(struct linux_sigregs); 152 fp &= ~0xf; 153 154 /* 155 * Prepare a sigcontext for later. 156 */ 157 memset(&sc, 0, sizeof sc); 158 sc.lsignal = (int)native_to_linux_signo[sig]; 159 sc.lhandler = (unsigned long)catcher; 160 native_to_linux_old_extra_sigset(&sc.lmask, &sc._unused[3], mask); 161 sc.lregs = (struct linux_pt_regs*)fp; 162 163 /* 164 * Setup the signal stack frame as Linux does it in 165 * arch/ppc/kernel/signal.c:setup_frame() 166 * 167 * Save register context. 168 */ 169 for (i = 0; i < 32; i++) 170 linux_regs.lgpr[i] = tf->fixreg[i]; 171 linux_regs.lnip = tf->srr0; 172 linux_regs.lmsr = tf->srr1 & PSL_USERSRR1; 173 linux_regs.lorig_gpr3 = tf->fixreg[3]; /* XXX Is that right? */ 174 linux_regs.lctr = tf->ctr; 175 linux_regs.llink = tf->lr; 176 linux_regs.lxer = tf->xer; 177 linux_regs.lccr = tf->cr; 178 linux_regs.lmq = 0; /* Unused, 601 only */ 179 linux_regs.ltrap = tf->exc; 180 linux_regs.ldar = tf->dar; 181 linux_regs.ldsisr = tf->dsisr; 182 linux_regs.lresult = 0; 183 184 memset(&frame, 0, sizeof(frame)); 185 memcpy(&frame.lgp_regs, &linux_regs, sizeof(linux_regs)); 186 187 save_fpu_lwp(curlwp, FPU_SAVE); 188 memcpy(&frame.lfp_regs, curpcb->pcb_fpu.fpreg, sizeof(frame.lfp_regs)); 189 190 /* 191 * Copy Linux's signal trampoline on the user stack It should not 192 * be used, but Linux binaries might expect it to be there. 193 */ 194 frame.ltramp[0] = 0x38997777; /* li r0, 0x7777 */ 195 frame.ltramp[1] = 0x44000002; /* sc */ 196 197 /* 198 * Move it to the user stack 199 * There is a little trick here, about the LINUX_ABIGAP: the 200 * linux_sigreg structure has a 56 int gap to support rs6000/xcoff 201 * binaries. But the Linux kernel seems to do without it, and it 202 * just skip it when building the stack frame. Hence the LINUX_ABIGAP. 203 */ 204 sendsig_reset(l, sig); 205 mutex_exit(p->p_lock); 206 error = copyout(&frame, (void *)fp, sizeof (frame) - LINUX_ABIGAP); 207 208 if (error != 0) { 209 /* 210 * Process has trashed its stack; give it an illegal 211 * instruction to halt it in its tracks. 212 */ 213 mutex_enter(p->p_lock); 214 sigexit(l, SIGILL); 215 /* NOTREACHED */ 216 } 217 218 /* 219 * Add a sigcontext on the stack 220 */ 221 fp -= sizeof(struct linux_sigcontext); 222 error = copyout(&sc, (void *)fp, sizeof (struct linux_sigcontext)); 223 mutex_enter(p->p_lock); 224 225 if (error != 0) { 226 /* 227 * Process has trashed its stack; give it an illegal 228 * instruction to halt it in its tracks. 229 */ 230 sigexit(l, SIGILL); 231 /* NOTREACHED */ 232 } 233 234 /* 235 * Set the registers according to how the Linux process expects them. 236 * "Mind the gap" Linux expects a gap here. 237 */ 238 tf->fixreg[1] = fp - LINUX__SIGNAL_FRAMESIZE; 239 tf->lr = (int)catcher; 240 tf->fixreg[3] = (int)native_to_linux_signo[sig]; 241 tf->fixreg[4] = fp; 242 tf->srr0 = (int)p->p_sigctx.ps_sigcode; 243 244 #ifdef DEBUG_LINUX 245 printf("fp at end of linux_sendsig = %x\n", fp); 246 #endif 247 /* 248 * Remember that we're now on the signal stack. 249 */ 250 if (onstack) 251 l->l_sigstk.ss_flags |= SS_ONSTACK; 252 #ifdef DEBUG_LINUX 253 printf("linux_sendsig: exitting. fp=0x%lx\n",(long)fp); 254 #endif 255 } 256 257 /* 258 * System call to cleanup state after a signal 259 * has been taken. Reset signal mask and 260 * stack state from context left by sendsig (above). 261 * Return to previous pc and psl as specified by 262 * context left by sendsig. Check carefully to 263 * make sure that the user has not modified the 264 * psl to gain improper privileges or to cause 265 * a machine fault. 266 * 267 * XXX not tested 268 */ 269 int 270 linux_sys_rt_sigreturn(struct lwp *l, const struct linux_sys_rt_sigreturn_args *uap, register_t *retval) 271 { 272 /* { 273 syscallarg(struct linux_rt_sigframe *) sfp; 274 } */ 275 struct proc *p = l->l_proc; 276 struct linux_rt_sigframe *scp, sigframe; 277 struct linux_sigregs sregs; 278 struct linux_pt_regs *lregs; 279 struct trapframe *tf; 280 sigset_t mask; 281 int i; 282 283 /* 284 * The trampoline code hands us the context. 285 * It is unsafe to keep track of it ourselves, in the event that a 286 * program jumps out of a signal handler. 287 */ 288 scp = SCARG(uap, sfp); 289 290 /* 291 * Get the context from user stack 292 */ 293 if (copyin((void *)scp, &sigframe, sizeof(*scp))) 294 return (EFAULT); 295 296 /* 297 * Restore register context. 298 */ 299 if (copyin((void *)sigframe.luc.luc_context.lregs, 300 &sregs, sizeof(sregs))) 301 return (EFAULT); 302 lregs = (struct linux_pt_regs *)&sregs.lgp_regs; 303 304 tf = trapframe(l); 305 #ifdef DEBUG_LINUX 306 (unsigned long)tf, (unsigned long)scp); 307 #endif 308 309 if (!PSL_USEROK_P(lregs->lmsr)) 310 return (EINVAL); 311 312 for (i = 0; i < 32; i++) 313 tf->fixreg[i] = lregs->lgpr[i]; 314 tf->lr = lregs->llink; 315 tf->cr = lregs->lccr; 316 tf->xer = lregs->lxer; 317 tf->ctr = lregs->lctr; 318 tf->srr0 = lregs->lnip; 319 tf->srr1 = lregs->lmsr; 320 321 /* 322 * Make sure the fpu state is discarded 323 */ 324 save_fpu_lwp(curlwp, FPU_DISCARD); 325 326 memcpy(curpcb->pcb_fpu.fpreg, (void *)&sregs.lfp_regs, 327 sizeof(curpcb->pcb_fpu.fpreg)); 328 329 mutex_enter(p->p_lock); 330 331 /* 332 * Restore signal stack. 333 * 334 * XXX cannot find the onstack information in Linux sig context. 335 * Is signal stack really supported on Linux? 336 * 337 * It seems to be supported in libc6... 338 */ 339 /* if (sc.sc_onstack & SS_ONSTACK) 340 l->l_sigstk.ss_flags |= SS_ONSTACK; 341 else */ 342 l->l_sigstk.ss_flags &= ~SS_ONSTACK; 343 344 /* 345 * Grab the signal mask 346 */ 347 linux_to_native_sigset(&mask, &sigframe.luc.luc_sigmask); 348 (void) sigprocmask1(l, SIG_SETMASK, &mask, 0); 349 350 mutex_exit(p->p_lock); 351 352 return (EJUSTRETURN); 353 } 354 355 356 /* 357 * The following needs code review for potential security issues 358 */ 359 int 360 linux_sys_sigreturn(struct lwp *l, const struct linux_sys_sigreturn_args *uap, register_t *retval) 361 { 362 /* { 363 syscallarg(struct linux_sigcontext *) scp; 364 } */ 365 struct proc *p = l->l_proc; 366 struct linux_sigcontext *scp, context; 367 struct linux_sigregs sregs; 368 struct linux_pt_regs *lregs; 369 struct trapframe *tf; 370 sigset_t mask; 371 int i; 372 373 /* 374 * The trampoline code hands us the context. 375 * It is unsafe to keep track of it ourselves, in the event that a 376 * program jumps out of a signal handler. 377 */ 378 scp = SCARG(uap, scp); 379 380 /* 381 * Get the context from user stack 382 */ 383 if (copyin(scp, &context, sizeof(*scp))) 384 return (EFAULT); 385 386 /* 387 * Restore register context. 388 */ 389 if (copyin((void *)context.lregs, &sregs, sizeof(sregs))) 390 return (EFAULT); 391 lregs = (struct linux_pt_regs *)&sregs.lgp_regs; 392 393 tf = trapframe(l); 394 #ifdef DEBUG_LINUX 395 printf("linux_sys_sigreturn: trapframe=0x%lx scp=0x%lx\n", 396 (unsigned long)tf, (unsigned long)scp); 397 #endif 398 399 if (!PSL_USEROK_P(lregs->lmsr)) 400 return (EINVAL); 401 402 for (i = 0; i < 32; i++) 403 tf->fixreg[i] = lregs->lgpr[i]; 404 tf->lr = lregs->llink; 405 tf->cr = lregs->lccr; 406 tf->xer = lregs->lxer; 407 tf->ctr = lregs->lctr; 408 tf->srr0 = lregs->lnip; 409 tf->srr1 = lregs->lmsr; 410 411 /* 412 * Make sure the fpu state is discarded 413 */ 414 save_fpu_lwp(curlwp, FPU_DISCARD); 415 416 memcpy(curpcb->pcb_fpu.fpreg, (void *)&sregs.lfp_regs, 417 sizeof(curpcb->pcb_fpu.fpreg)); 418 419 mutex_enter(p->p_lock); 420 421 /* 422 * Restore signal stack. 423 * 424 * XXX cannot find the onstack information in Linux sig context. 425 * Is signal stack really supported on Linux? 426 */ 427 #if 0 428 if (sc.sc_onstack & SS_ONSTACK) 429 l->l_sigstk.ss_flags |= SS_ONSTACK; 430 else 431 #endif 432 l->l_sigstk.ss_flags &= ~SS_ONSTACK; 433 434 /* Restore signal mask. */ 435 linux_old_extra_to_native_sigset(&mask, &context.lmask, 436 &context._unused[3]); 437 (void) sigprocmask1(l, SIG_SETMASK, &mask, 0); 438 439 mutex_exit(p->p_lock); 440 441 return (EJUSTRETURN); 442 } 443 444 445 #if 0 446 int 447 linux_sys_modify_ldt(struct proc *p, void *v, register_t *retval) 448 { 449 /* 450 * This syscall is not implemented in Linux/PowerPC: we should not 451 * be here 452 */ 453 #ifdef DEBUG_LINUX 454 printf("linux_sys_modify_ldt: should not be here.\n"); 455 #endif 456 return 0; 457 } 458 #endif 459 460 /* 461 * major device numbers remapping 462 */ 463 dev_t 464 linux_fakedev(dev_t dev, int raw) 465 { 466 /* XXX write me */ 467 return dev; 468 } 469 470 /* 471 * We come here in a last attempt to satisfy a Linux ioctl() call 472 */ 473 int 474 linux_machdepioctl(struct lwp *l, const struct linux_sys_ioctl_args *uap, register_t *retval) 475 { 476 /* { 477 syscallarg(int) fd; 478 syscallarg(u_long) com; 479 syscallarg(void *) data; 480 } */ 481 struct sys_ioctl_args bia; 482 u_long com; 483 484 SCARG(&bia, fd) = SCARG(uap, fd); 485 SCARG(&bia, data) = SCARG(uap, data); 486 com = SCARG(uap, com); 487 488 switch (com) { 489 default: 490 printf("linux_machdepioctl: invalid ioctl %08lx\n", com); 491 return EINVAL; 492 } 493 SCARG(&bia, com) = com; 494 /* XXX NJWLWP */ 495 return sys_ioctl(curlwp, &bia, retval); 496 } 497 #if 0 498 /* 499 * Set I/O permissions for a process. Just set the maximum level 500 * right away (ignoring the argument), otherwise we would have 501 * to rely on I/O permission maps, which are not implemented. 502 */ 503 int 504 linux_sys_iopl(struct lwp *l, const void *v, register_t *retval) 505 { 506 /* 507 * This syscall is not implemented in Linux/PowerPC: we should not be here 508 */ 509 #ifdef DEBUG_LINUX 510 printf("linux_sys_iopl: should not be here.\n"); 511 #endif 512 return 0; 513 } 514 #endif 515 516 /* 517 * See above. If a root process tries to set access to an I/O port, 518 * just let it have the whole range. 519 */ 520 int 521 linux_sys_ioperm(struct lwp *l, const struct linux_sys_ioperm_args *uap, register_t *retval) 522 { 523 /* 524 * This syscall is not implemented in Linux/PowerPC: we should not be here 525 */ 526 #ifdef DEBUG_LINUX 527 printf("linux_sys_ioperm: should not be here.\n"); 528 #endif 529 return 0; 530 } 531 532 /* 533 * wrapper linux_sys_new_uname() -> linux_sys_uname() 534 */ 535 int 536 linux_sys_new_uname(struct lwp *l, const struct linux_sys_new_uname_args *uap, register_t *retval) 537 { 538 return linux_sys_uname(l, (const void *)uap, retval); 539 } 540 541 /* 542 * wrapper linux_sys_new_select() -> linux_sys_select() 543 */ 544 int 545 linux_sys_new_select(struct lwp *l, const struct linux_sys_new_select_args *uap, register_t *retval) 546 { 547 return linux_sys_select(l, (const void *)uap, retval); 548 } 549 550 int 551 linux_usertrap(struct lwp *l, vaddr_t trapaddr, void *arg) 552 { 553 return 0; 554 } 555