1 /* $NetBSD: linux_machdep.c,v 1.158 2014/02/19 21:23:02 dsl Exp $ */ 2 3 /*- 4 * Copyright (c) 1995, 2000, 2008, 2009 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Frank van der Linden, and by Andrew Doran. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 29 * POSSIBILITY OF SUCH DAMAGE. 30 */ 31 32 #include <sys/cdefs.h> 33 __KERNEL_RCSID(0, "$NetBSD: linux_machdep.c,v 1.158 2014/02/19 21:23:02 dsl Exp $"); 34 35 #if defined(_KERNEL_OPT) 36 #include "opt_vm86.h" 37 #include "opt_user_ldt.h" 38 #endif 39 40 #include <sys/param.h> 41 #include <sys/systm.h> 42 #include <sys/signalvar.h> 43 #include <sys/kernel.h> 44 #include <sys/proc.h> 45 #include <sys/buf.h> 46 #include <sys/reboot.h> 47 #include <sys/conf.h> 48 #include <sys/exec.h> 49 #include <sys/file.h> 50 #include <sys/callout.h> 51 #include <sys/malloc.h> 52 #include <sys/mbuf.h> 53 #include <sys/msgbuf.h> 54 #include <sys/mount.h> 55 #include <sys/vnode.h> 56 #include <sys/device.h> 57 #include <sys/syscallargs.h> 58 #include <sys/filedesc.h> 59 #include <sys/exec_elf.h> 60 #include <sys/disklabel.h> 61 #include <sys/ioctl.h> 62 #include <sys/wait.h> 63 #include <sys/kauth.h> 64 #include <sys/kmem.h> 65 66 #include <miscfs/specfs/specdev.h> 67 68 #include <compat/linux/common/linux_types.h> 69 #include <compat/linux/common/linux_signal.h> 70 #include <compat/linux/common/linux_util.h> 71 #include <compat/linux/common/linux_ioctl.h> 72 #include <compat/linux/common/linux_hdio.h> 73 #include <compat/linux/common/linux_exec.h> 74 #include <compat/linux/common/linux_machdep.h> 75 #include <compat/linux/common/linux_errno.h> 76 77 #include <compat/linux/linux_syscallargs.h> 78 79 #include <sys/cpu.h> 80 #include <machine/cpufunc.h> 81 #include <machine/psl.h> 82 #include <machine/reg.h> 83 #include <machine/segments.h> 84 #include <machine/specialreg.h> 85 #include <machine/sysarch.h> 86 #include <machine/vm86.h> 87 #include <machine/vmparam.h> 88 89 #include <x86/fpu.h> 90 91 /* 92 * To see whether wscons is configured (for virtual console ioctl calls). 93 */ 94 #if defined(_KERNEL_OPT) 95 #include "wsdisplay.h" 96 #endif 97 #if (NWSDISPLAY > 0) 98 #include <dev/wscons/wsconsio.h> 99 #include <dev/wscons/wsdisplay_usl_io.h> 100 #if defined(_KERNEL_OPT) 101 #include "opt_xserver.h" 102 #endif 103 #endif 104 105 #ifdef DEBUG_LINUX 106 #define DPRINTF(a) uprintf a 107 #else 108 #define DPRINTF(a) 109 #endif 110 111 static struct biosdisk_info *fd2biosinfo(struct proc *, struct file *); 112 extern struct disklist *x86_alldisks; 113 static void linux_save_ucontext(struct lwp *, struct trapframe *, 114 const sigset_t *, struct sigaltstack *, struct linux_ucontext *); 115 static void linux_save_sigcontext(struct lwp *, struct trapframe *, 116 const sigset_t *, struct linux_sigcontext *); 117 static int linux_restore_sigcontext(struct lwp *, 118 struct linux_sigcontext *, register_t *); 119 static void linux_rt_sendsig(const ksiginfo_t *, const sigset_t *); 120 static void linux_old_sendsig(const ksiginfo_t *, const sigset_t *); 121 122 extern char linux_sigcode[], linux_rt_sigcode[]; 123 124 /* 125 * Deal with some i386-specific things in the Linux emulation code. 126 */ 127 128 void 129 linux_setregs(struct lwp *l, struct exec_package *epp, vaddr_t stack) 130 { 131 struct trapframe *tf; 132 133 #ifdef USER_LDT 134 pmap_ldt_cleanup(l); 135 #endif 136 137 fpu_save_area_clear(l, __Linux_NPXCW__); 138 139 tf = l->l_md.md_regs; 140 tf->tf_gs = 0; 141 tf->tf_fs = GSEL(GUDATA_SEL, SEL_UPL); 142 tf->tf_es = GSEL(GUDATA_SEL, SEL_UPL); 143 tf->tf_ds = GSEL(GUDATA_SEL, SEL_UPL); 144 tf->tf_edi = 0; 145 tf->tf_esi = 0; 146 tf->tf_ebp = 0; 147 tf->tf_ebx = l->l_proc->p_psstrp; 148 tf->tf_edx = 0; 149 tf->tf_ecx = 0; 150 tf->tf_eax = 0; 151 tf->tf_eip = epp->ep_entry; 152 tf->tf_cs = GSEL(GUCODEBIG_SEL, SEL_UPL); 153 tf->tf_eflags = PSL_USERSET; 154 tf->tf_esp = stack; 155 tf->tf_ss = GSEL(GUDATA_SEL, SEL_UPL); 156 } 157 158 /* 159 * Send an interrupt to process. 160 * 161 * Stack is set up to allow sigcode stored 162 * in u. to call routine, followed by kcall 163 * to sigreturn routine below. After sigreturn 164 * resets the signal mask, the stack, and the 165 * frame pointer, it returns to the user 166 * specified pc, psl. 167 */ 168 169 void 170 linux_sendsig(const ksiginfo_t *ksi, const sigset_t *mask) 171 { 172 if (SIGACTION(curproc, ksi->ksi_signo).sa_flags & SA_SIGINFO) 173 linux_rt_sendsig(ksi, mask); 174 else 175 linux_old_sendsig(ksi, mask); 176 } 177 178 179 static void 180 linux_save_ucontext(struct lwp *l, struct trapframe *tf, const sigset_t *mask, struct sigaltstack *sas, struct linux_ucontext *uc) 181 { 182 uc->uc_flags = 0; 183 uc->uc_link = NULL; 184 native_to_linux_sigaltstack(&uc->uc_stack, sas); 185 linux_save_sigcontext(l, tf, mask, &uc->uc_mcontext); 186 native_to_linux_sigset(&uc->uc_sigmask, mask); 187 (void)memset(&uc->uc_fpregs_mem, 0, sizeof(uc->uc_fpregs_mem)); 188 } 189 190 static void 191 linux_save_sigcontext(struct lwp *l, struct trapframe *tf, 192 const sigset_t *mask, struct linux_sigcontext *sc) 193 { 194 struct pcb *pcb = lwp_getpcb(l); 195 196 /* Save register context. */ 197 #ifdef VM86 198 if (tf->tf_eflags & PSL_VM) { 199 sc->sc_gs = tf->tf_vm86_gs; 200 sc->sc_fs = tf->tf_vm86_fs; 201 sc->sc_es = tf->tf_vm86_es; 202 sc->sc_ds = tf->tf_vm86_ds; 203 sc->sc_eflags = get_vflags(l); 204 } else 205 #endif 206 { 207 sc->sc_gs = tf->tf_gs; 208 sc->sc_fs = tf->tf_fs; 209 sc->sc_es = tf->tf_es; 210 sc->sc_ds = tf->tf_ds; 211 sc->sc_eflags = tf->tf_eflags; 212 } 213 sc->sc_edi = tf->tf_edi; 214 sc->sc_esi = tf->tf_esi; 215 sc->sc_esp = tf->tf_esp; 216 sc->sc_ebp = tf->tf_ebp; 217 sc->sc_ebx = tf->tf_ebx; 218 sc->sc_edx = tf->tf_edx; 219 sc->sc_ecx = tf->tf_ecx; 220 sc->sc_eax = tf->tf_eax; 221 sc->sc_eip = tf->tf_eip; 222 sc->sc_cs = tf->tf_cs; 223 sc->sc_esp_at_signal = tf->tf_esp; 224 sc->sc_ss = tf->tf_ss; 225 sc->sc_err = tf->tf_err; 226 sc->sc_trapno = tf->tf_trapno; 227 sc->sc_cr2 = pcb->pcb_cr2; 228 sc->sc_387 = NULL; 229 230 /* Save signal stack. */ 231 /* Linux doesn't save the onstack flag in sigframe */ 232 233 /* Save signal mask. */ 234 native_to_linux_old_sigset(&sc->sc_mask, mask); 235 } 236 237 static void 238 linux_rt_sendsig(const ksiginfo_t *ksi, const sigset_t *mask) 239 { 240 struct lwp *l = curlwp; 241 struct proc *p = l->l_proc; 242 struct trapframe *tf; 243 struct linux_rt_sigframe *fp, frame; 244 int onstack, error; 245 int sig = ksi->ksi_signo; 246 sig_t catcher = SIGACTION(p, sig).sa_handler; 247 struct sigaltstack *sas = &l->l_sigstk; 248 249 tf = l->l_md.md_regs; 250 /* Do we need to jump onto the signal stack? */ 251 onstack = (sas->ss_flags & (SS_DISABLE | SS_ONSTACK)) == 0 && 252 (SIGACTION(p, sig).sa_flags & SA_ONSTACK) != 0; 253 254 255 /* Allocate space for the signal handler context. */ 256 if (onstack) 257 fp = (struct linux_rt_sigframe *)((char *)sas->ss_sp + 258 sas->ss_size); 259 else 260 fp = (struct linux_rt_sigframe *)tf->tf_esp; 261 fp--; 262 263 DPRINTF(("rt: onstack = %d, fp = %p sig = %d eip = 0x%x cr2 = 0x%x\n", 264 onstack, fp, sig, tf->tf_eip, 265 ((struct pcb *)lwp_getpcb(l))->pcb_cr2)); 266 267 /* Build stack frame for signal trampoline. */ 268 frame.sf_handler = catcher; 269 frame.sf_sig = native_to_linux_signo[sig]; 270 frame.sf_sip = &fp->sf_si; 271 frame.sf_ucp = &fp->sf_uc; 272 273 /* 274 * XXX: the following code assumes that the constants for 275 * siginfo are the same between linux and NetBSD. 276 */ 277 native_to_linux_siginfo(&frame.sf_si, &ksi->ksi_info); 278 279 /* Save register context. */ 280 linux_save_ucontext(l, tf, mask, sas, &frame.sf_uc); 281 sendsig_reset(l, sig); 282 283 mutex_exit(p->p_lock); 284 error = copyout(&frame, fp, sizeof(frame)); 285 mutex_enter(p->p_lock); 286 287 if (error != 0) { 288 /* 289 * Process has trashed its stack; give it an illegal 290 * instruction to halt it in its tracks. 291 */ 292 sigexit(l, SIGILL); 293 /* NOTREACHED */ 294 } 295 296 /* 297 * Build context to run handler in. 298 */ 299 tf->tf_fs = GSEL(GUDATA_SEL, SEL_UPL); 300 tf->tf_es = GSEL(GUDATA_SEL, SEL_UPL); 301 tf->tf_ds = GSEL(GUDATA_SEL, SEL_UPL); 302 tf->tf_eip = ((int)p->p_sigctx.ps_sigcode) + 303 (linux_rt_sigcode - linux_sigcode); 304 tf->tf_cs = GSEL(GUCODE_SEL, SEL_UPL); 305 tf->tf_eflags &= ~PSL_CLEARSIG; 306 tf->tf_esp = (int)fp; 307 tf->tf_ss = GSEL(GUDATA_SEL, SEL_UPL); 308 309 /* Remember that we're now on the signal stack. */ 310 if (onstack) 311 sas->ss_flags |= SS_ONSTACK; 312 } 313 314 static void 315 linux_old_sendsig(const ksiginfo_t *ksi, const sigset_t *mask) 316 { 317 struct lwp *l = curlwp; 318 struct proc *p = l->l_proc; 319 struct trapframe *tf; 320 struct linux_sigframe *fp, frame; 321 int onstack, error; 322 int sig = ksi->ksi_signo; 323 sig_t catcher = SIGACTION(p, sig).sa_handler; 324 struct sigaltstack *sas = &l->l_sigstk; 325 326 tf = l->l_md.md_regs; 327 328 /* Do we need to jump onto the signal stack? */ 329 onstack = (sas->ss_flags & (SS_DISABLE | SS_ONSTACK)) == 0 && 330 (SIGACTION(p, sig).sa_flags & SA_ONSTACK) != 0; 331 332 /* Allocate space for the signal handler context. */ 333 if (onstack) 334 fp = (struct linux_sigframe *) ((char *)sas->ss_sp + 335 sas->ss_size); 336 else 337 fp = (struct linux_sigframe *)tf->tf_esp; 338 fp--; 339 340 DPRINTF(("old: onstack = %d, fp = %p sig = %d eip = 0x%x cr2 = 0x%x\n", 341 onstack, fp, sig, tf->tf_eip, 342 ((struct pcb *)lwp_getpcb(l))->pcb_cr2)); 343 344 /* Build stack frame for signal trampoline. */ 345 frame.sf_handler = catcher; 346 frame.sf_sig = native_to_linux_signo[sig]; 347 348 linux_save_sigcontext(l, tf, mask, &frame.sf_sc); 349 sendsig_reset(l, sig); 350 351 mutex_exit(p->p_lock); 352 error = copyout(&frame, fp, sizeof(frame)); 353 mutex_enter(p->p_lock); 354 355 if (error != 0) { 356 /* 357 * Process has trashed its stack; give it an illegal 358 * instruction to halt it in its tracks. 359 */ 360 sigexit(l, SIGILL); 361 /* NOTREACHED */ 362 } 363 364 /* 365 * Build context to run handler in. 366 */ 367 tf->tf_fs = GSEL(GUDATA_SEL, SEL_UPL); 368 tf->tf_es = GSEL(GUDATA_SEL, SEL_UPL); 369 tf->tf_ds = GSEL(GUDATA_SEL, SEL_UPL); 370 tf->tf_eip = (int)p->p_sigctx.ps_sigcode; 371 tf->tf_cs = GSEL(GUCODEBIG_SEL, SEL_UPL); 372 tf->tf_eflags &= ~PSL_CLEARSIG; 373 tf->tf_esp = (int)fp; 374 tf->tf_ss = GSEL(GUDATA_SEL, SEL_UPL); 375 376 /* Remember that we're now on the signal stack. */ 377 if (onstack) 378 sas->ss_flags |= SS_ONSTACK; 379 } 380 381 /* 382 * System call to cleanup state after a signal 383 * has been taken. Reset signal mask and 384 * stack state from context left by sendsig (above). 385 * Return to previous pc and psl as specified by 386 * context left by sendsig. Check carefully to 387 * make sure that the user has not modified the 388 * psl to gain improper privileges or to cause 389 * a machine fault. 390 */ 391 int 392 linux_sys_rt_sigreturn(struct lwp *l, const struct linux_sys_rt_sigreturn_args *uap, register_t *retval) 393 { 394 /* { 395 syscallarg(struct linux_ucontext *) ucp; 396 } */ 397 struct linux_ucontext context, *ucp = SCARG(uap, ucp); 398 int error; 399 400 /* 401 * The trampoline code hands us the context. 402 * It is unsafe to keep track of it ourselves, in the event that a 403 * program jumps out of a signal handler. 404 */ 405 if ((error = copyin(ucp, &context, sizeof(*ucp))) != 0) 406 return error; 407 408 /* XXX XAX we can do better here by using more of the ucontext */ 409 return linux_restore_sigcontext(l, &context.uc_mcontext, retval); 410 } 411 412 int 413 linux_sys_sigreturn(struct lwp *l, const struct linux_sys_sigreturn_args *uap, register_t *retval) 414 { 415 /* { 416 syscallarg(struct linux_sigcontext *) scp; 417 } */ 418 struct linux_sigcontext context, *scp = SCARG(uap, scp); 419 int error; 420 421 /* 422 * The trampoline code hands us the context. 423 * It is unsafe to keep track of it ourselves, in the event that a 424 * program jumps out of a signal handler. 425 */ 426 if ((error = copyin((void *)scp, &context, sizeof(*scp))) != 0) 427 return error; 428 return linux_restore_sigcontext(l, &context, retval); 429 } 430 431 static int 432 linux_restore_sigcontext(struct lwp *l, struct linux_sigcontext *scp, 433 register_t *retval) 434 { 435 struct proc *p = l->l_proc; 436 struct sigaltstack *sas = &l->l_sigstk; 437 struct trapframe *tf; 438 sigset_t mask; 439 ssize_t ss_gap; 440 441 /* Restore register context. */ 442 tf = l->l_md.md_regs; 443 DPRINTF(("sigreturn enter esp=0x%x eip=0x%x\n", tf->tf_esp, tf->tf_eip)); 444 445 #ifdef VM86 446 if (scp->sc_eflags & PSL_VM) { 447 void syscall_vm86(struct trapframe *); 448 449 tf->tf_vm86_gs = scp->sc_gs; 450 tf->tf_vm86_fs = scp->sc_fs; 451 tf->tf_vm86_es = scp->sc_es; 452 tf->tf_vm86_ds = scp->sc_ds; 453 set_vflags(l, scp->sc_eflags); 454 p->p_md.md_syscall = syscall_vm86; 455 } else 456 #endif 457 { 458 /* 459 * Check for security violations. If we're returning to 460 * protected mode, the CPU will validate the segment registers 461 * automatically and generate a trap on violations. We handle 462 * the trap, rather than doing all of the checking here. 463 */ 464 if (((scp->sc_eflags ^ tf->tf_eflags) & PSL_USERSTATIC) != 0 || 465 !USERMODE(scp->sc_cs, scp->sc_eflags)) 466 return EINVAL; 467 468 tf->tf_gs = scp->sc_gs; 469 tf->tf_fs = scp->sc_fs; 470 tf->tf_es = scp->sc_es; 471 tf->tf_ds = scp->sc_ds; 472 #ifdef VM86 473 if (tf->tf_eflags & PSL_VM) 474 (*p->p_emul->e_syscall_intern)(p); 475 #endif 476 tf->tf_eflags = scp->sc_eflags; 477 } 478 tf->tf_edi = scp->sc_edi; 479 tf->tf_esi = scp->sc_esi; 480 tf->tf_ebp = scp->sc_ebp; 481 tf->tf_ebx = scp->sc_ebx; 482 tf->tf_edx = scp->sc_edx; 483 tf->tf_ecx = scp->sc_ecx; 484 tf->tf_eax = scp->sc_eax; 485 tf->tf_eip = scp->sc_eip; 486 tf->tf_cs = scp->sc_cs; 487 tf->tf_esp = scp->sc_esp_at_signal; 488 tf->tf_ss = scp->sc_ss; 489 490 /* Restore signal stack. */ 491 /* 492 * Linux really does it this way; it doesn't have space in sigframe 493 * to save the onstack flag. 494 */ 495 mutex_enter(p->p_lock); 496 ss_gap = (ssize_t)((char *)scp->sc_esp_at_signal - (char *)sas->ss_sp); 497 if (ss_gap >= 0 && ss_gap < sas->ss_size) 498 sas->ss_flags |= SS_ONSTACK; 499 else 500 sas->ss_flags &= ~SS_ONSTACK; 501 502 /* Restore signal mask. */ 503 linux_old_to_native_sigset(&mask, &scp->sc_mask); 504 (void) sigprocmask1(l, SIG_SETMASK, &mask, 0); 505 mutex_exit(p->p_lock); 506 507 DPRINTF(("sigreturn exit esp=0x%x eip=0x%x\n", tf->tf_esp, tf->tf_eip)); 508 return EJUSTRETURN; 509 } 510 511 #ifdef USER_LDT 512 513 static int 514 linux_read_ldt(struct lwp *l, const struct linux_sys_modify_ldt_args *uap, 515 register_t *retval) 516 { 517 struct x86_get_ldt_args gl; 518 int error; 519 union descriptor *ldt_buf; 520 size_t sz; 521 522 /* 523 * I've checked the linux code - this function is asymetric with 524 * linux_write_ldt, and returns raw ldt entries. 525 * NB, the code I saw zerod the spare parts of the user buffer. 526 */ 527 528 DPRINTF(("linux_read_ldt!")); 529 530 sz = 8192 * sizeof(*ldt_buf); 531 ldt_buf = kmem_zalloc(sz, KM_SLEEP); 532 gl.start = 0; 533 gl.desc = NULL; 534 gl.num = SCARG(uap, bytecount) / sizeof(union descriptor); 535 error = x86_get_ldt1(l, &gl, ldt_buf); 536 /* NB gl.num might have changed */ 537 if (error == 0) { 538 *retval = gl.num * sizeof *ldt; 539 error = copyout(ldt_buf, SCARG(uap, ptr), 540 gl.num * sizeof *ldt_buf); 541 } 542 kmem_free(ldt_buf, sz); 543 544 return error; 545 } 546 547 struct linux_ldt_info { 548 u_int entry_number; 549 u_long base_addr; 550 u_int limit; 551 u_int seg_32bit:1; 552 u_int contents:2; 553 u_int read_exec_only:1; 554 u_int limit_in_pages:1; 555 u_int seg_not_present:1; 556 u_int useable:1; 557 }; 558 559 static int 560 linux_write_ldt(struct lwp *l, const struct linux_sys_modify_ldt_args *uap, 561 int oldmode) 562 { 563 struct linux_ldt_info ldt_info; 564 union descriptor d; 565 struct x86_set_ldt_args sl; 566 int error; 567 568 DPRINTF(("linux_write_ldt %d\n", oldmode)); 569 if (SCARG(uap, bytecount) != sizeof(ldt_info)) 570 return (EINVAL); 571 if ((error = copyin(SCARG(uap, ptr), &ldt_info, sizeof(ldt_info))) != 0) 572 return error; 573 if (ldt_info.entry_number >= 8192) 574 return (EINVAL); 575 if (ldt_info.contents == 3) { 576 if (oldmode) 577 return (EINVAL); 578 if (ldt_info.seg_not_present) 579 return (EINVAL); 580 } 581 582 if (ldt_info.base_addr == 0 && ldt_info.limit == 0 && 583 (oldmode || (ldt_info.contents == 0 && 584 ldt_info.read_exec_only == 1 && ldt_info.seg_32bit == 0 && 585 ldt_info.limit_in_pages == 0 && ldt_info.seg_not_present == 1 && 586 ldt_info.useable == 0))) { 587 /* this means you should zero the ldt */ 588 (void)memset(&d, 0, sizeof(d)); 589 } else { 590 d.sd.sd_lobase = ldt_info.base_addr & 0xffffff; 591 d.sd.sd_hibase = (ldt_info.base_addr >> 24) & 0xff; 592 d.sd.sd_lolimit = ldt_info.limit & 0xffff; 593 d.sd.sd_hilimit = (ldt_info.limit >> 16) & 0xf; 594 d.sd.sd_type = 16 | (ldt_info.contents << 2) | 595 (!ldt_info.read_exec_only << 1); 596 d.sd.sd_dpl = SEL_UPL; 597 d.sd.sd_p = !ldt_info.seg_not_present; 598 d.sd.sd_def32 = ldt_info.seg_32bit; 599 d.sd.sd_gran = ldt_info.limit_in_pages; 600 if (!oldmode) 601 d.sd.sd_xx = ldt_info.useable; 602 else 603 d.sd.sd_xx = 0; 604 } 605 sl.start = ldt_info.entry_number; 606 sl.desc = NULL; 607 sl.num = 1; 608 609 DPRINTF(("linux_write_ldt: idx=%d, base=0x%lx, limit=0x%x\n", 610 ldt_info.entry_number, ldt_info.base_addr, ldt_info.limit)); 611 612 return x86_set_ldt1(l, &sl, &d); 613 } 614 615 #endif /* USER_LDT */ 616 617 int 618 linux_sys_modify_ldt(struct lwp *l, const struct linux_sys_modify_ldt_args *uap, register_t *retval) 619 { 620 /* { 621 syscallarg(int) func; 622 syscallarg(void *) ptr; 623 syscallarg(size_t) bytecount; 624 } */ 625 626 switch (SCARG(uap, func)) { 627 #ifdef USER_LDT 628 case 0: 629 return linux_read_ldt(l, (const void *)uap, retval); 630 case 1: 631 return linux_write_ldt(l, (const void *)uap, 1); 632 case 2: 633 #ifdef notyet 634 return linux_read_default_ldt(l, (const void *)uap, retval); 635 #else 636 return (ENOSYS); 637 #endif 638 case 0x11: 639 return linux_write_ldt(l, (const void *)uap, 0); 640 #endif /* USER_LDT */ 641 642 default: 643 return (ENOSYS); 644 } 645 } 646 647 /* 648 * XXX Pathetic hack to make svgalib work. This will fake the major 649 * device number of an opened VT so that svgalib likes it. grmbl. 650 * Should probably do it 'wrong the right way' and use a mapping 651 * array for all major device numbers, and map linux_mknod too. 652 */ 653 dev_t 654 linux_fakedev(dev_t dev, int raw) 655 { 656 extern const struct cdevsw ptc_cdevsw, pts_cdevsw; 657 const struct cdevsw *cd = cdevsw_lookup(dev); 658 659 if (raw) { 660 #if (NWSDISPLAY > 0) 661 extern const struct cdevsw wsdisplay_cdevsw; 662 if (cd == &wsdisplay_cdevsw) 663 return makedev(LINUX_CONS_MAJOR, (minor(dev) + 1)); 664 #endif 665 } 666 667 if (cd == &ptc_cdevsw) 668 return makedev(LINUX_PTC_MAJOR, minor(dev)); 669 if (cd == &pts_cdevsw) 670 return makedev(LINUX_PTS_MAJOR, minor(dev)); 671 672 return dev; 673 } 674 675 #if (NWSDISPLAY > 0) 676 /* 677 * That's not complete, but enough to get an X server running. 678 */ 679 #define NR_KEYS 128 680 static const u_short plain_map[NR_KEYS] = { 681 0x0200, 0x001b, 0x0031, 0x0032, 0x0033, 0x0034, 0x0035, 0x0036, 682 0x0037, 0x0038, 0x0039, 0x0030, 0x002d, 0x003d, 0x007f, 0x0009, 683 0x0b71, 0x0b77, 0x0b65, 0x0b72, 0x0b74, 0x0b79, 0x0b75, 0x0b69, 684 0x0b6f, 0x0b70, 0x005b, 0x005d, 0x0201, 0x0702, 0x0b61, 0x0b73, 685 0x0b64, 0x0b66, 0x0b67, 0x0b68, 0x0b6a, 0x0b6b, 0x0b6c, 0x003b, 686 0x0027, 0x0060, 0x0700, 0x005c, 0x0b7a, 0x0b78, 0x0b63, 0x0b76, 687 0x0b62, 0x0b6e, 0x0b6d, 0x002c, 0x002e, 0x002f, 0x0700, 0x030c, 688 0x0703, 0x0020, 0x0207, 0x0100, 0x0101, 0x0102, 0x0103, 0x0104, 689 0x0105, 0x0106, 0x0107, 0x0108, 0x0109, 0x0208, 0x0209, 0x0307, 690 0x0308, 0x0309, 0x030b, 0x0304, 0x0305, 0x0306, 0x030a, 0x0301, 691 0x0302, 0x0303, 0x0300, 0x0310, 0x0206, 0x0200, 0x003c, 0x010a, 692 0x010b, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 693 0x030e, 0x0702, 0x030d, 0x001c, 0x0701, 0x0205, 0x0114, 0x0603, 694 0x0118, 0x0601, 0x0602, 0x0117, 0x0600, 0x0119, 0x0115, 0x0116, 695 0x011a, 0x010c, 0x010d, 0x011b, 0x011c, 0x0110, 0x0311, 0x011d, 696 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 697 }, shift_map[NR_KEYS] = { 698 0x0200, 0x001b, 0x0021, 0x0040, 0x0023, 0x0024, 0x0025, 0x005e, 699 0x0026, 0x002a, 0x0028, 0x0029, 0x005f, 0x002b, 0x007f, 0x0009, 700 0x0b51, 0x0b57, 0x0b45, 0x0b52, 0x0b54, 0x0b59, 0x0b55, 0x0b49, 701 0x0b4f, 0x0b50, 0x007b, 0x007d, 0x0201, 0x0702, 0x0b41, 0x0b53, 702 0x0b44, 0x0b46, 0x0b47, 0x0b48, 0x0b4a, 0x0b4b, 0x0b4c, 0x003a, 703 0x0022, 0x007e, 0x0700, 0x007c, 0x0b5a, 0x0b58, 0x0b43, 0x0b56, 704 0x0b42, 0x0b4e, 0x0b4d, 0x003c, 0x003e, 0x003f, 0x0700, 0x030c, 705 0x0703, 0x0020, 0x0207, 0x010a, 0x010b, 0x010c, 0x010d, 0x010e, 706 0x010f, 0x0110, 0x0111, 0x0112, 0x0113, 0x0213, 0x0203, 0x0307, 707 0x0308, 0x0309, 0x030b, 0x0304, 0x0305, 0x0306, 0x030a, 0x0301, 708 0x0302, 0x0303, 0x0300, 0x0310, 0x0206, 0x0200, 0x003e, 0x010a, 709 0x010b, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 710 0x030e, 0x0702, 0x030d, 0x0200, 0x0701, 0x0205, 0x0114, 0x0603, 711 0x020b, 0x0601, 0x0602, 0x0117, 0x0600, 0x020a, 0x0115, 0x0116, 712 0x011a, 0x010c, 0x010d, 0x011b, 0x011c, 0x0110, 0x0311, 0x011d, 713 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 714 }, altgr_map[NR_KEYS] = { 715 0x0200, 0x0200, 0x0200, 0x0040, 0x0200, 0x0024, 0x0200, 0x0200, 716 0x007b, 0x005b, 0x005d, 0x007d, 0x005c, 0x0200, 0x0200, 0x0200, 717 0x0b71, 0x0b77, 0x0918, 0x0b72, 0x0b74, 0x0b79, 0x0b75, 0x0b69, 718 0x0b6f, 0x0b70, 0x0200, 0x007e, 0x0201, 0x0702, 0x0914, 0x0b73, 719 0x0917, 0x0919, 0x0b67, 0x0b68, 0x0b6a, 0x0b6b, 0x0b6c, 0x0200, 720 0x0200, 0x0200, 0x0700, 0x0200, 0x0b7a, 0x0b78, 0x0916, 0x0b76, 721 0x0915, 0x0b6e, 0x0b6d, 0x0200, 0x0200, 0x0200, 0x0700, 0x030c, 722 0x0703, 0x0200, 0x0207, 0x050c, 0x050d, 0x050e, 0x050f, 0x0510, 723 0x0511, 0x0512, 0x0513, 0x0514, 0x0515, 0x0208, 0x0202, 0x0911, 724 0x0912, 0x0913, 0x030b, 0x090e, 0x090f, 0x0910, 0x030a, 0x090b, 725 0x090c, 0x090d, 0x090a, 0x0310, 0x0206, 0x0200, 0x007c, 0x0516, 726 0x0517, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 727 0x030e, 0x0702, 0x030d, 0x0200, 0x0701, 0x0205, 0x0114, 0x0603, 728 0x0118, 0x0601, 0x0602, 0x0117, 0x0600, 0x0119, 0x0115, 0x0116, 729 0x011a, 0x010c, 0x010d, 0x011b, 0x011c, 0x0110, 0x0311, 0x011d, 730 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 731 }, ctrl_map[NR_KEYS] = { 732 0x0200, 0x0200, 0x0200, 0x0000, 0x001b, 0x001c, 0x001d, 0x001e, 733 0x001f, 0x007f, 0x0200, 0x0200, 0x001f, 0x0200, 0x0008, 0x0200, 734 0x0011, 0x0017, 0x0005, 0x0012, 0x0014, 0x0019, 0x0015, 0x0009, 735 0x000f, 0x0010, 0x001b, 0x001d, 0x0201, 0x0702, 0x0001, 0x0013, 736 0x0004, 0x0006, 0x0007, 0x0008, 0x000a, 0x000b, 0x000c, 0x0200, 737 0x0007, 0x0000, 0x0700, 0x001c, 0x001a, 0x0018, 0x0003, 0x0016, 738 0x0002, 0x000e, 0x000d, 0x0200, 0x020e, 0x007f, 0x0700, 0x030c, 739 0x0703, 0x0000, 0x0207, 0x0100, 0x0101, 0x0102, 0x0103, 0x0104, 740 0x0105, 0x0106, 0x0107, 0x0108, 0x0109, 0x0208, 0x0204, 0x0307, 741 0x0308, 0x0309, 0x030b, 0x0304, 0x0305, 0x0306, 0x030a, 0x0301, 742 0x0302, 0x0303, 0x0300, 0x0310, 0x0206, 0x0200, 0x0200, 0x010a, 743 0x010b, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 744 0x030e, 0x0702, 0x030d, 0x001c, 0x0701, 0x0205, 0x0114, 0x0603, 745 0x0118, 0x0601, 0x0602, 0x0117, 0x0600, 0x0119, 0x0115, 0x0116, 746 0x011a, 0x010c, 0x010d, 0x011b, 0x011c, 0x0110, 0x0311, 0x011d, 747 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 748 }; 749 750 const u_short * const linux_keytabs[] = { 751 plain_map, shift_map, altgr_map, altgr_map, ctrl_map 752 }; 753 #endif 754 755 static struct biosdisk_info * 756 fd2biosinfo(struct proc *p, struct file *fp) 757 { 758 struct vnode *vp; 759 const char *blkname; 760 char diskname[16]; 761 int i; 762 struct nativedisk_info *nip; 763 struct disklist *dl = x86_alldisks; 764 765 if (fp->f_type != DTYPE_VNODE) 766 return NULL; 767 vp = (struct vnode *)fp->f_data; 768 769 if (vp->v_type != VBLK) 770 return NULL; 771 772 blkname = devsw_blk2name(major(vp->v_rdev)); 773 snprintf(diskname, sizeof diskname, "%s%llu", blkname, 774 (unsigned long long)DISKUNIT(vp->v_rdev)); 775 776 for (i = 0; i < dl->dl_nnativedisks; i++) { 777 nip = &dl->dl_nativedisks[i]; 778 if (strcmp(diskname, nip->ni_devname)) 779 continue; 780 if (nip->ni_nmatches != 0) 781 return &dl->dl_biosdisks[nip->ni_biosmatches[0]]; 782 } 783 784 return NULL; 785 } 786 787 788 /* 789 * We come here in a last attempt to satisfy a Linux ioctl() call 790 */ 791 int 792 linux_machdepioctl(struct lwp *l, const struct linux_sys_ioctl_args *uap, register_t *retval) 793 { 794 /* { 795 syscallarg(int) fd; 796 syscallarg(u_long) com; 797 syscallarg(void *) data; 798 } */ 799 struct sys_ioctl_args bia; 800 u_long com; 801 int error, error1; 802 #if (NWSDISPLAY > 0) 803 struct vt_mode lvt; 804 struct kbentry kbe; 805 #endif 806 struct linux_hd_geometry hdg; 807 struct linux_hd_big_geometry hdg_big; 808 struct biosdisk_info *bip; 809 file_t *fp; 810 int fd; 811 struct disklabel label, *labp; 812 struct partinfo partp; 813 int (*ioctlf)(struct file *, u_long, void *); 814 u_long start, biostotal, realtotal; 815 u_char heads, sectors; 816 u_int cylinders; 817 struct ioctl_pt pt; 818 819 fd = SCARG(uap, fd); 820 SCARG(&bia, fd) = fd; 821 SCARG(&bia, data) = SCARG(uap, data); 822 com = SCARG(uap, com); 823 824 if ((fp = fd_getfile(fd)) == NULL) 825 return (EBADF); 826 827 switch (com) { 828 #if (NWSDISPLAY > 0) 829 case LINUX_KDGKBMODE: 830 com = KDGKBMODE; 831 break; 832 case LINUX_KDSKBMODE: 833 com = KDSKBMODE; 834 if ((unsigned)SCARG(uap, data) == LINUX_K_MEDIUMRAW) 835 SCARG(&bia, data) = (void *)K_RAW; 836 break; 837 case LINUX_KIOCSOUND: 838 SCARG(&bia, data) = 839 (void *)(((unsigned long)SCARG(&bia, data)) & 0xffff); 840 /* fall through */ 841 case LINUX_KDMKTONE: 842 com = KDMKTONE; 843 break; 844 case LINUX_KDSETMODE: 845 com = KDSETMODE; 846 break; 847 case LINUX_KDGETMODE: 848 /* KD_* values are equal to the wscons numbers */ 849 com = WSDISPLAYIO_GMODE; 850 break; 851 case LINUX_KDENABIO: 852 com = KDENABIO; 853 break; 854 case LINUX_KDDISABIO: 855 com = KDDISABIO; 856 break; 857 case LINUX_KDGETLED: 858 com = KDGETLED; 859 break; 860 case LINUX_KDSETLED: 861 com = KDSETLED; 862 break; 863 case LINUX_VT_OPENQRY: 864 com = VT_OPENQRY; 865 break; 866 case LINUX_VT_GETMODE: 867 error = fp->f_ops->fo_ioctl(fp, VT_GETMODE, &lvt); 868 if (error != 0) 869 goto out; 870 lvt.relsig = native_to_linux_signo[lvt.relsig]; 871 lvt.acqsig = native_to_linux_signo[lvt.acqsig]; 872 lvt.frsig = native_to_linux_signo[lvt.frsig]; 873 error = copyout(&lvt, SCARG(uap, data), sizeof (lvt)); 874 goto out; 875 case LINUX_VT_SETMODE: 876 error = copyin(SCARG(uap, data), &lvt, sizeof (lvt)); 877 if (error != 0) 878 goto out; 879 lvt.relsig = linux_to_native_signo[lvt.relsig]; 880 lvt.acqsig = linux_to_native_signo[lvt.acqsig]; 881 lvt.frsig = linux_to_native_signo[lvt.frsig]; 882 error = fp->f_ops->fo_ioctl(fp, VT_SETMODE, &lvt); 883 goto out; 884 case LINUX_VT_DISALLOCATE: 885 /* XXX should use WSDISPLAYIO_DELSCREEN */ 886 error = 0; 887 goto out; 888 case LINUX_VT_RELDISP: 889 com = VT_RELDISP; 890 break; 891 case LINUX_VT_ACTIVATE: 892 com = VT_ACTIVATE; 893 break; 894 case LINUX_VT_WAITACTIVE: 895 com = VT_WAITACTIVE; 896 break; 897 case LINUX_VT_GETSTATE: 898 com = VT_GETSTATE; 899 break; 900 case LINUX_KDGKBTYPE: 901 { 902 static const u_int8_t kb101 = KB_101; 903 904 /* This is what Linux does. */ 905 error = copyout(&kb101, SCARG(uap, data), 1); 906 goto out; 907 } 908 case LINUX_KDGKBENT: 909 /* 910 * The Linux KDGKBENT ioctl is different from the 911 * SYSV original. So we handle it in machdep code. 912 * XXX We should use keyboard mapping information 913 * from wsdisplay, but this would be expensive. 914 */ 915 if ((error = copyin(SCARG(uap, data), &kbe, 916 sizeof(struct kbentry)))) 917 goto out; 918 if (kbe.kb_table >= sizeof(linux_keytabs) / sizeof(u_short *) 919 || kbe.kb_index >= NR_KEYS) { 920 error = EINVAL; 921 goto out; 922 } 923 kbe.kb_value = linux_keytabs[kbe.kb_table][kbe.kb_index]; 924 error = copyout(&kbe, SCARG(uap, data), 925 sizeof(struct kbentry)); 926 goto out; 927 #endif 928 case LINUX_HDIO_GETGEO: 929 case LINUX_HDIO_GETGEO_BIG: 930 /* 931 * Try to mimic Linux behaviour: return the BIOS geometry 932 * if possible (extending its # of cylinders if it's beyond 933 * the 1023 limit), fall back to the MI geometry (i.e. 934 * the real geometry) if not found, by returning an 935 * error. See common/linux_hdio.c 936 */ 937 bip = fd2biosinfo(curproc, fp); 938 ioctlf = fp->f_ops->fo_ioctl; 939 error = ioctlf(fp, DIOCGDEFLABEL, (void *)&label); 940 error1 = ioctlf(fp, DIOCGPART, (void *)&partp); 941 if (error != 0 && error1 != 0) { 942 error = error1; 943 goto out; 944 } 945 labp = error != 0 ? &label : partp.disklab; 946 start = error1 != 0 ? partp.part->p_offset : 0; 947 if (bip != NULL && bip->bi_head != 0 && bip->bi_sec != 0 948 && bip->bi_cyl != 0) { 949 heads = bip->bi_head; 950 sectors = bip->bi_sec; 951 cylinders = bip->bi_cyl; 952 biostotal = heads * sectors * cylinders; 953 realtotal = labp->d_ntracks * labp->d_nsectors * 954 labp->d_ncylinders; 955 if (realtotal > biostotal) 956 cylinders = realtotal / (heads * sectors); 957 } else { 958 heads = labp->d_ntracks; 959 cylinders = labp->d_ncylinders; 960 sectors = labp->d_nsectors; 961 } 962 if (com == LINUX_HDIO_GETGEO) { 963 hdg.start = start; 964 hdg.heads = heads; 965 hdg.cylinders = cylinders; 966 hdg.sectors = sectors; 967 error = copyout(&hdg, SCARG(uap, data), sizeof hdg); 968 goto out; 969 } else { 970 hdg_big.start = start; 971 hdg_big.heads = heads; 972 hdg_big.cylinders = cylinders; 973 hdg_big.sectors = sectors; 974 error = copyout(&hdg_big, SCARG(uap, data), 975 sizeof hdg_big); 976 goto out; 977 } 978 979 default: 980 /* 981 * Unknown to us. If it's on a device, just pass it through 982 * using PTIOCLINUX, the device itself might be able to 983 * make some sense of it. 984 * XXX hack: if the function returns EJUSTRETURN, 985 * it has stuffed a sysctl return value in pt.data. 986 */ 987 ioctlf = fp->f_ops->fo_ioctl; 988 pt.com = SCARG(uap, com); 989 pt.data = SCARG(uap, data); 990 error = ioctlf(fp, PTIOCLINUX, &pt); 991 if (error == EJUSTRETURN) { 992 retval[0] = (register_t)pt.data; 993 error = 0; 994 } 995 996 if (error == ENOTTY) { 997 DPRINTF(("linux_machdepioctl: invalid ioctl %08lx\n", 998 com)); 999 } 1000 goto out; 1001 } 1002 SCARG(&bia, com) = com; 1003 error = sys_ioctl(curlwp, &bia, retval); 1004 out: 1005 fd_putfile(fd); 1006 return error; 1007 } 1008 1009 /* 1010 * Set I/O permissions for a process. Just set the maximum level 1011 * right away (ignoring the argument), otherwise we would have 1012 * to rely on I/O permission maps, which are not implemented. 1013 */ 1014 int 1015 linux_sys_iopl(struct lwp *l, const struct linux_sys_iopl_args *uap, register_t *retval) 1016 { 1017 /* { 1018 syscallarg(int) level; 1019 } */ 1020 struct trapframe *fp = l->l_md.md_regs; 1021 1022 if (kauth_authorize_machdep(l->l_cred, KAUTH_MACHDEP_IOPL, 1023 NULL, NULL, NULL, NULL) != 0) 1024 return EPERM; 1025 fp->tf_eflags |= PSL_IOPL; 1026 *retval = 0; 1027 return 0; 1028 } 1029 1030 /* 1031 * See above. If a root process tries to set access to an I/O port, 1032 * just let it have the whole range. 1033 */ 1034 int 1035 linux_sys_ioperm(struct lwp *l, const struct linux_sys_ioperm_args *uap, register_t *retval) 1036 { 1037 /* { 1038 syscallarg(unsigned int) lo; 1039 syscallarg(unsigned int) hi; 1040 syscallarg(int) val; 1041 } */ 1042 struct trapframe *fp = l->l_md.md_regs; 1043 1044 if (kauth_authorize_machdep(l->l_cred, SCARG(uap, val) ? 1045 KAUTH_MACHDEP_IOPERM_SET : KAUTH_MACHDEP_IOPERM_GET, NULL, NULL, 1046 NULL, NULL) != 0) 1047 return EPERM; 1048 if (SCARG(uap, val)) 1049 fp->tf_eflags |= PSL_IOPL; 1050 *retval = 0; 1051 return 0; 1052 } 1053 1054 int 1055 linux_usertrap(struct lwp *l, vaddr_t trapaddr, 1056 void *arg) 1057 { 1058 return 0; 1059 } 1060 1061 const char * 1062 linux_get_uname_arch(void) 1063 { 1064 static char uname_arch[5] = "i386"; 1065 1066 if (uname_arch[1] == '3') 1067 uname_arch[1] += cpu_class; 1068 return uname_arch; 1069 } 1070