1 /* $NetBSD: linux_machdep.c,v 1.159 2014/11/09 17:48:07 maxv Exp $ */ 2 3 /*- 4 * Copyright (c) 1995, 2000, 2008, 2009 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Frank van der Linden, and by Andrew Doran. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 29 * POSSIBILITY OF SUCH DAMAGE. 30 */ 31 32 #include <sys/cdefs.h> 33 __KERNEL_RCSID(0, "$NetBSD: linux_machdep.c,v 1.159 2014/11/09 17:48:07 maxv Exp $"); 34 35 #if defined(_KERNEL_OPT) 36 #include "opt_vm86.h" 37 #include "opt_user_ldt.h" 38 #endif 39 40 #include <sys/param.h> 41 #include <sys/systm.h> 42 #include <sys/signalvar.h> 43 #include <sys/kernel.h> 44 #include <sys/proc.h> 45 #include <sys/buf.h> 46 #include <sys/reboot.h> 47 #include <sys/conf.h> 48 #include <sys/exec.h> 49 #include <sys/file.h> 50 #include <sys/callout.h> 51 #include <sys/mbuf.h> 52 #include <sys/msgbuf.h> 53 #include <sys/mount.h> 54 #include <sys/vnode.h> 55 #include <sys/device.h> 56 #include <sys/syscallargs.h> 57 #include <sys/filedesc.h> 58 #include <sys/exec_elf.h> 59 #include <sys/disklabel.h> 60 #include <sys/ioctl.h> 61 #include <sys/wait.h> 62 #include <sys/kauth.h> 63 #include <sys/kmem.h> 64 65 #include <miscfs/specfs/specdev.h> 66 67 #include <compat/linux/common/linux_types.h> 68 #include <compat/linux/common/linux_signal.h> 69 #include <compat/linux/common/linux_util.h> 70 #include <compat/linux/common/linux_ioctl.h> 71 #include <compat/linux/common/linux_hdio.h> 72 #include <compat/linux/common/linux_exec.h> 73 #include <compat/linux/common/linux_machdep.h> 74 #include <compat/linux/common/linux_errno.h> 75 76 #include <compat/linux/linux_syscallargs.h> 77 78 #include <sys/cpu.h> 79 #include <machine/cpufunc.h> 80 #include <machine/psl.h> 81 #include <machine/reg.h> 82 #include <machine/segments.h> 83 #include <machine/specialreg.h> 84 #include <machine/sysarch.h> 85 #include <machine/vm86.h> 86 #include <machine/vmparam.h> 87 88 #include <x86/fpu.h> 89 90 /* 91 * To see whether wscons is configured (for virtual console ioctl calls). 92 */ 93 #if defined(_KERNEL_OPT) 94 #include "wsdisplay.h" 95 #endif 96 #if (NWSDISPLAY > 0) 97 #include <dev/wscons/wsconsio.h> 98 #include <dev/wscons/wsdisplay_usl_io.h> 99 #if defined(_KERNEL_OPT) 100 #include "opt_xserver.h" 101 #endif 102 #endif 103 104 #ifdef DEBUG_LINUX 105 #define DPRINTF(a) uprintf a 106 #else 107 #define DPRINTF(a) 108 #endif 109 110 static struct biosdisk_info *fd2biosinfo(struct proc *, struct file *); 111 extern struct disklist *x86_alldisks; 112 static void linux_save_ucontext(struct lwp *, struct trapframe *, 113 const sigset_t *, struct sigaltstack *, struct linux_ucontext *); 114 static void linux_save_sigcontext(struct lwp *, struct trapframe *, 115 const sigset_t *, struct linux_sigcontext *); 116 static int linux_restore_sigcontext(struct lwp *, 117 struct linux_sigcontext *, register_t *); 118 static void linux_rt_sendsig(const ksiginfo_t *, const sigset_t *); 119 static void linux_old_sendsig(const ksiginfo_t *, const sigset_t *); 120 121 extern char linux_sigcode[], linux_rt_sigcode[]; 122 123 /* 124 * Deal with some i386-specific things in the Linux emulation code. 125 */ 126 127 void 128 linux_setregs(struct lwp *l, struct exec_package *epp, vaddr_t stack) 129 { 130 struct trapframe *tf; 131 132 #ifdef USER_LDT 133 pmap_ldt_cleanup(l); 134 #endif 135 136 fpu_save_area_clear(l, __Linux_NPXCW__); 137 138 tf = l->l_md.md_regs; 139 tf->tf_gs = 0; 140 tf->tf_fs = GSEL(GUDATA_SEL, SEL_UPL); 141 tf->tf_es = GSEL(GUDATA_SEL, SEL_UPL); 142 tf->tf_ds = GSEL(GUDATA_SEL, SEL_UPL); 143 tf->tf_edi = 0; 144 tf->tf_esi = 0; 145 tf->tf_ebp = 0; 146 tf->tf_ebx = l->l_proc->p_psstrp; 147 tf->tf_edx = 0; 148 tf->tf_ecx = 0; 149 tf->tf_eax = 0; 150 tf->tf_eip = epp->ep_entry; 151 tf->tf_cs = GSEL(GUCODEBIG_SEL, SEL_UPL); 152 tf->tf_eflags = PSL_USERSET; 153 tf->tf_esp = stack; 154 tf->tf_ss = GSEL(GUDATA_SEL, SEL_UPL); 155 } 156 157 /* 158 * Send an interrupt to process. 159 * 160 * Stack is set up to allow sigcode stored 161 * in u. to call routine, followed by kcall 162 * to sigreturn routine below. After sigreturn 163 * resets the signal mask, the stack, and the 164 * frame pointer, it returns to the user 165 * specified pc, psl. 166 */ 167 168 void 169 linux_sendsig(const ksiginfo_t *ksi, const sigset_t *mask) 170 { 171 if (SIGACTION(curproc, ksi->ksi_signo).sa_flags & SA_SIGINFO) 172 linux_rt_sendsig(ksi, mask); 173 else 174 linux_old_sendsig(ksi, mask); 175 } 176 177 178 static void 179 linux_save_ucontext(struct lwp *l, struct trapframe *tf, const sigset_t *mask, struct sigaltstack *sas, struct linux_ucontext *uc) 180 { 181 uc->uc_flags = 0; 182 uc->uc_link = NULL; 183 native_to_linux_sigaltstack(&uc->uc_stack, sas); 184 linux_save_sigcontext(l, tf, mask, &uc->uc_mcontext); 185 native_to_linux_sigset(&uc->uc_sigmask, mask); 186 (void)memset(&uc->uc_fpregs_mem, 0, sizeof(uc->uc_fpregs_mem)); 187 } 188 189 static void 190 linux_save_sigcontext(struct lwp *l, struct trapframe *tf, 191 const sigset_t *mask, struct linux_sigcontext *sc) 192 { 193 struct pcb *pcb = lwp_getpcb(l); 194 195 /* Save register context. */ 196 #ifdef VM86 197 if (tf->tf_eflags & PSL_VM) { 198 sc->sc_gs = tf->tf_vm86_gs; 199 sc->sc_fs = tf->tf_vm86_fs; 200 sc->sc_es = tf->tf_vm86_es; 201 sc->sc_ds = tf->tf_vm86_ds; 202 sc->sc_eflags = get_vflags(l); 203 } else 204 #endif 205 { 206 sc->sc_gs = tf->tf_gs; 207 sc->sc_fs = tf->tf_fs; 208 sc->sc_es = tf->tf_es; 209 sc->sc_ds = tf->tf_ds; 210 sc->sc_eflags = tf->tf_eflags; 211 } 212 sc->sc_edi = tf->tf_edi; 213 sc->sc_esi = tf->tf_esi; 214 sc->sc_esp = tf->tf_esp; 215 sc->sc_ebp = tf->tf_ebp; 216 sc->sc_ebx = tf->tf_ebx; 217 sc->sc_edx = tf->tf_edx; 218 sc->sc_ecx = tf->tf_ecx; 219 sc->sc_eax = tf->tf_eax; 220 sc->sc_eip = tf->tf_eip; 221 sc->sc_cs = tf->tf_cs; 222 sc->sc_esp_at_signal = tf->tf_esp; 223 sc->sc_ss = tf->tf_ss; 224 sc->sc_err = tf->tf_err; 225 sc->sc_trapno = tf->tf_trapno; 226 sc->sc_cr2 = pcb->pcb_cr2; 227 sc->sc_387 = NULL; 228 229 /* Save signal stack. */ 230 /* Linux doesn't save the onstack flag in sigframe */ 231 232 /* Save signal mask. */ 233 native_to_linux_old_sigset(&sc->sc_mask, mask); 234 } 235 236 static void 237 linux_rt_sendsig(const ksiginfo_t *ksi, const sigset_t *mask) 238 { 239 struct lwp *l = curlwp; 240 struct proc *p = l->l_proc; 241 struct trapframe *tf; 242 struct linux_rt_sigframe *fp, frame; 243 int onstack, error; 244 int sig = ksi->ksi_signo; 245 sig_t catcher = SIGACTION(p, sig).sa_handler; 246 struct sigaltstack *sas = &l->l_sigstk; 247 248 tf = l->l_md.md_regs; 249 /* Do we need to jump onto the signal stack? */ 250 onstack = (sas->ss_flags & (SS_DISABLE | SS_ONSTACK)) == 0 && 251 (SIGACTION(p, sig).sa_flags & SA_ONSTACK) != 0; 252 253 254 /* Allocate space for the signal handler context. */ 255 if (onstack) 256 fp = (struct linux_rt_sigframe *)((char *)sas->ss_sp + 257 sas->ss_size); 258 else 259 fp = (struct linux_rt_sigframe *)tf->tf_esp; 260 fp--; 261 262 DPRINTF(("rt: onstack = %d, fp = %p sig = %d eip = 0x%x cr2 = 0x%x\n", 263 onstack, fp, sig, tf->tf_eip, 264 ((struct pcb *)lwp_getpcb(l))->pcb_cr2)); 265 266 /* Build stack frame for signal trampoline. */ 267 frame.sf_handler = catcher; 268 frame.sf_sig = native_to_linux_signo[sig]; 269 frame.sf_sip = &fp->sf_si; 270 frame.sf_ucp = &fp->sf_uc; 271 272 /* 273 * XXX: the following code assumes that the constants for 274 * siginfo are the same between linux and NetBSD. 275 */ 276 native_to_linux_siginfo(&frame.sf_si, &ksi->ksi_info); 277 278 /* Save register context. */ 279 linux_save_ucontext(l, tf, mask, sas, &frame.sf_uc); 280 sendsig_reset(l, sig); 281 282 mutex_exit(p->p_lock); 283 error = copyout(&frame, fp, sizeof(frame)); 284 mutex_enter(p->p_lock); 285 286 if (error != 0) { 287 /* 288 * Process has trashed its stack; give it an illegal 289 * instruction to halt it in its tracks. 290 */ 291 sigexit(l, SIGILL); 292 /* NOTREACHED */ 293 } 294 295 /* 296 * Build context to run handler in. 297 */ 298 tf->tf_fs = GSEL(GUDATA_SEL, SEL_UPL); 299 tf->tf_es = GSEL(GUDATA_SEL, SEL_UPL); 300 tf->tf_ds = GSEL(GUDATA_SEL, SEL_UPL); 301 tf->tf_eip = ((int)p->p_sigctx.ps_sigcode) + 302 (linux_rt_sigcode - linux_sigcode); 303 tf->tf_cs = GSEL(GUCODE_SEL, SEL_UPL); 304 tf->tf_eflags &= ~PSL_CLEARSIG; 305 tf->tf_esp = (int)fp; 306 tf->tf_ss = GSEL(GUDATA_SEL, SEL_UPL); 307 308 /* Remember that we're now on the signal stack. */ 309 if (onstack) 310 sas->ss_flags |= SS_ONSTACK; 311 } 312 313 static void 314 linux_old_sendsig(const ksiginfo_t *ksi, const sigset_t *mask) 315 { 316 struct lwp *l = curlwp; 317 struct proc *p = l->l_proc; 318 struct trapframe *tf; 319 struct linux_sigframe *fp, frame; 320 int onstack, error; 321 int sig = ksi->ksi_signo; 322 sig_t catcher = SIGACTION(p, sig).sa_handler; 323 struct sigaltstack *sas = &l->l_sigstk; 324 325 tf = l->l_md.md_regs; 326 327 /* Do we need to jump onto the signal stack? */ 328 onstack = (sas->ss_flags & (SS_DISABLE | SS_ONSTACK)) == 0 && 329 (SIGACTION(p, sig).sa_flags & SA_ONSTACK) != 0; 330 331 /* Allocate space for the signal handler context. */ 332 if (onstack) 333 fp = (struct linux_sigframe *) ((char *)sas->ss_sp + 334 sas->ss_size); 335 else 336 fp = (struct linux_sigframe *)tf->tf_esp; 337 fp--; 338 339 DPRINTF(("old: onstack = %d, fp = %p sig = %d eip = 0x%x cr2 = 0x%x\n", 340 onstack, fp, sig, tf->tf_eip, 341 ((struct pcb *)lwp_getpcb(l))->pcb_cr2)); 342 343 /* Build stack frame for signal trampoline. */ 344 frame.sf_handler = catcher; 345 frame.sf_sig = native_to_linux_signo[sig]; 346 347 linux_save_sigcontext(l, tf, mask, &frame.sf_sc); 348 sendsig_reset(l, sig); 349 350 mutex_exit(p->p_lock); 351 error = copyout(&frame, fp, sizeof(frame)); 352 mutex_enter(p->p_lock); 353 354 if (error != 0) { 355 /* 356 * Process has trashed its stack; give it an illegal 357 * instruction to halt it in its tracks. 358 */ 359 sigexit(l, SIGILL); 360 /* NOTREACHED */ 361 } 362 363 /* 364 * Build context to run handler in. 365 */ 366 tf->tf_fs = GSEL(GUDATA_SEL, SEL_UPL); 367 tf->tf_es = GSEL(GUDATA_SEL, SEL_UPL); 368 tf->tf_ds = GSEL(GUDATA_SEL, SEL_UPL); 369 tf->tf_eip = (int)p->p_sigctx.ps_sigcode; 370 tf->tf_cs = GSEL(GUCODEBIG_SEL, SEL_UPL); 371 tf->tf_eflags &= ~PSL_CLEARSIG; 372 tf->tf_esp = (int)fp; 373 tf->tf_ss = GSEL(GUDATA_SEL, SEL_UPL); 374 375 /* Remember that we're now on the signal stack. */ 376 if (onstack) 377 sas->ss_flags |= SS_ONSTACK; 378 } 379 380 /* 381 * System call to cleanup state after a signal 382 * has been taken. Reset signal mask and 383 * stack state from context left by sendsig (above). 384 * Return to previous pc and psl as specified by 385 * context left by sendsig. Check carefully to 386 * make sure that the user has not modified the 387 * psl to gain improper privileges or to cause 388 * a machine fault. 389 */ 390 int 391 linux_sys_rt_sigreturn(struct lwp *l, const struct linux_sys_rt_sigreturn_args *uap, register_t *retval) 392 { 393 /* { 394 syscallarg(struct linux_ucontext *) ucp; 395 } */ 396 struct linux_ucontext context, *ucp = SCARG(uap, ucp); 397 int error; 398 399 /* 400 * The trampoline code hands us the context. 401 * It is unsafe to keep track of it ourselves, in the event that a 402 * program jumps out of a signal handler. 403 */ 404 if ((error = copyin(ucp, &context, sizeof(*ucp))) != 0) 405 return error; 406 407 /* XXX XAX we can do better here by using more of the ucontext */ 408 return linux_restore_sigcontext(l, &context.uc_mcontext, retval); 409 } 410 411 int 412 linux_sys_sigreturn(struct lwp *l, const struct linux_sys_sigreturn_args *uap, register_t *retval) 413 { 414 /* { 415 syscallarg(struct linux_sigcontext *) scp; 416 } */ 417 struct linux_sigcontext context, *scp = SCARG(uap, scp); 418 int error; 419 420 /* 421 * The trampoline code hands us the context. 422 * It is unsafe to keep track of it ourselves, in the event that a 423 * program jumps out of a signal handler. 424 */ 425 if ((error = copyin((void *)scp, &context, sizeof(*scp))) != 0) 426 return error; 427 return linux_restore_sigcontext(l, &context, retval); 428 } 429 430 static int 431 linux_restore_sigcontext(struct lwp *l, struct linux_sigcontext *scp, 432 register_t *retval) 433 { 434 struct proc *p = l->l_proc; 435 struct sigaltstack *sas = &l->l_sigstk; 436 struct trapframe *tf; 437 sigset_t mask; 438 ssize_t ss_gap; 439 440 /* Restore register context. */ 441 tf = l->l_md.md_regs; 442 DPRINTF(("sigreturn enter esp=0x%x eip=0x%x\n", tf->tf_esp, tf->tf_eip)); 443 444 #ifdef VM86 445 if (scp->sc_eflags & PSL_VM) { 446 void syscall_vm86(struct trapframe *); 447 448 tf->tf_vm86_gs = scp->sc_gs; 449 tf->tf_vm86_fs = scp->sc_fs; 450 tf->tf_vm86_es = scp->sc_es; 451 tf->tf_vm86_ds = scp->sc_ds; 452 set_vflags(l, scp->sc_eflags); 453 p->p_md.md_syscall = syscall_vm86; 454 } else 455 #endif 456 { 457 /* 458 * Check for security violations. If we're returning to 459 * protected mode, the CPU will validate the segment registers 460 * automatically and generate a trap on violations. We handle 461 * the trap, rather than doing all of the checking here. 462 */ 463 if (((scp->sc_eflags ^ tf->tf_eflags) & PSL_USERSTATIC) != 0 || 464 !USERMODE(scp->sc_cs, scp->sc_eflags)) 465 return EINVAL; 466 467 tf->tf_gs = scp->sc_gs; 468 tf->tf_fs = scp->sc_fs; 469 tf->tf_es = scp->sc_es; 470 tf->tf_ds = scp->sc_ds; 471 #ifdef VM86 472 if (tf->tf_eflags & PSL_VM) 473 (*p->p_emul->e_syscall_intern)(p); 474 #endif 475 tf->tf_eflags = scp->sc_eflags; 476 } 477 tf->tf_edi = scp->sc_edi; 478 tf->tf_esi = scp->sc_esi; 479 tf->tf_ebp = scp->sc_ebp; 480 tf->tf_ebx = scp->sc_ebx; 481 tf->tf_edx = scp->sc_edx; 482 tf->tf_ecx = scp->sc_ecx; 483 tf->tf_eax = scp->sc_eax; 484 tf->tf_eip = scp->sc_eip; 485 tf->tf_cs = scp->sc_cs; 486 tf->tf_esp = scp->sc_esp_at_signal; 487 tf->tf_ss = scp->sc_ss; 488 489 /* Restore signal stack. */ 490 /* 491 * Linux really does it this way; it doesn't have space in sigframe 492 * to save the onstack flag. 493 */ 494 mutex_enter(p->p_lock); 495 ss_gap = (ssize_t)((char *)scp->sc_esp_at_signal - (char *)sas->ss_sp); 496 if (ss_gap >= 0 && ss_gap < sas->ss_size) 497 sas->ss_flags |= SS_ONSTACK; 498 else 499 sas->ss_flags &= ~SS_ONSTACK; 500 501 /* Restore signal mask. */ 502 linux_old_to_native_sigset(&mask, &scp->sc_mask); 503 (void) sigprocmask1(l, SIG_SETMASK, &mask, 0); 504 mutex_exit(p->p_lock); 505 506 DPRINTF(("sigreturn exit esp=0x%x eip=0x%x\n", tf->tf_esp, tf->tf_eip)); 507 return EJUSTRETURN; 508 } 509 510 #ifdef USER_LDT 511 512 static int 513 linux_read_ldt(struct lwp *l, const struct linux_sys_modify_ldt_args *uap, 514 register_t *retval) 515 { 516 struct x86_get_ldt_args gl; 517 int error; 518 union descriptor *ldt_buf; 519 size_t sz; 520 521 /* 522 * I've checked the linux code - this function is asymetric with 523 * linux_write_ldt, and returns raw ldt entries. 524 * NB, the code I saw zerod the spare parts of the user buffer. 525 */ 526 527 DPRINTF(("linux_read_ldt!")); 528 529 sz = 8192 * sizeof(*ldt_buf); 530 ldt_buf = kmem_zalloc(sz, KM_SLEEP); 531 gl.start = 0; 532 gl.desc = NULL; 533 gl.num = SCARG(uap, bytecount) / sizeof(union descriptor); 534 error = x86_get_ldt1(l, &gl, ldt_buf); 535 /* NB gl.num might have changed */ 536 if (error == 0) { 537 *retval = gl.num * sizeof *ldt; 538 error = copyout(ldt_buf, SCARG(uap, ptr), 539 gl.num * sizeof *ldt_buf); 540 } 541 kmem_free(ldt_buf, sz); 542 543 return error; 544 } 545 546 struct linux_ldt_info { 547 u_int entry_number; 548 u_long base_addr; 549 u_int limit; 550 u_int seg_32bit:1; 551 u_int contents:2; 552 u_int read_exec_only:1; 553 u_int limit_in_pages:1; 554 u_int seg_not_present:1; 555 u_int useable:1; 556 }; 557 558 static int 559 linux_write_ldt(struct lwp *l, const struct linux_sys_modify_ldt_args *uap, 560 int oldmode) 561 { 562 struct linux_ldt_info ldt_info; 563 union descriptor d; 564 struct x86_set_ldt_args sl; 565 int error; 566 567 DPRINTF(("linux_write_ldt %d\n", oldmode)); 568 if (SCARG(uap, bytecount) != sizeof(ldt_info)) 569 return (EINVAL); 570 if ((error = copyin(SCARG(uap, ptr), &ldt_info, sizeof(ldt_info))) != 0) 571 return error; 572 if (ldt_info.entry_number >= 8192) 573 return (EINVAL); 574 if (ldt_info.contents == 3) { 575 if (oldmode) 576 return (EINVAL); 577 if (ldt_info.seg_not_present) 578 return (EINVAL); 579 } 580 581 if (ldt_info.base_addr == 0 && ldt_info.limit == 0 && 582 (oldmode || (ldt_info.contents == 0 && 583 ldt_info.read_exec_only == 1 && ldt_info.seg_32bit == 0 && 584 ldt_info.limit_in_pages == 0 && ldt_info.seg_not_present == 1 && 585 ldt_info.useable == 0))) { 586 /* this means you should zero the ldt */ 587 (void)memset(&d, 0, sizeof(d)); 588 } else { 589 d.sd.sd_lobase = ldt_info.base_addr & 0xffffff; 590 d.sd.sd_hibase = (ldt_info.base_addr >> 24) & 0xff; 591 d.sd.sd_lolimit = ldt_info.limit & 0xffff; 592 d.sd.sd_hilimit = (ldt_info.limit >> 16) & 0xf; 593 d.sd.sd_type = 16 | (ldt_info.contents << 2) | 594 (!ldt_info.read_exec_only << 1); 595 d.sd.sd_dpl = SEL_UPL; 596 d.sd.sd_p = !ldt_info.seg_not_present; 597 d.sd.sd_def32 = ldt_info.seg_32bit; 598 d.sd.sd_gran = ldt_info.limit_in_pages; 599 if (!oldmode) 600 d.sd.sd_xx = ldt_info.useable; 601 else 602 d.sd.sd_xx = 0; 603 } 604 sl.start = ldt_info.entry_number; 605 sl.desc = NULL; 606 sl.num = 1; 607 608 DPRINTF(("linux_write_ldt: idx=%d, base=0x%lx, limit=0x%x\n", 609 ldt_info.entry_number, ldt_info.base_addr, ldt_info.limit)); 610 611 return x86_set_ldt1(l, &sl, &d); 612 } 613 614 #endif /* USER_LDT */ 615 616 int 617 linux_sys_modify_ldt(struct lwp *l, const struct linux_sys_modify_ldt_args *uap, register_t *retval) 618 { 619 /* { 620 syscallarg(int) func; 621 syscallarg(void *) ptr; 622 syscallarg(size_t) bytecount; 623 } */ 624 625 switch (SCARG(uap, func)) { 626 #ifdef USER_LDT 627 case 0: 628 return linux_read_ldt(l, (const void *)uap, retval); 629 case 1: 630 return linux_write_ldt(l, (const void *)uap, 1); 631 case 2: 632 #ifdef notyet 633 return linux_read_default_ldt(l, (const void *)uap, retval); 634 #else 635 return (ENOSYS); 636 #endif 637 case 0x11: 638 return linux_write_ldt(l, (const void *)uap, 0); 639 #endif /* USER_LDT */ 640 641 default: 642 return (ENOSYS); 643 } 644 } 645 646 /* 647 * XXX Pathetic hack to make svgalib work. This will fake the major 648 * device number of an opened VT so that svgalib likes it. grmbl. 649 * Should probably do it 'wrong the right way' and use a mapping 650 * array for all major device numbers, and map linux_mknod too. 651 */ 652 dev_t 653 linux_fakedev(dev_t dev, int raw) 654 { 655 extern const struct cdevsw ptc_cdevsw, pts_cdevsw; 656 const struct cdevsw *cd = cdevsw_lookup(dev); 657 658 if (raw) { 659 #if (NWSDISPLAY > 0) 660 extern const struct cdevsw wsdisplay_cdevsw; 661 if (cd == &wsdisplay_cdevsw) 662 return makedev(LINUX_CONS_MAJOR, (minor(dev) + 1)); 663 #endif 664 } 665 666 if (cd == &ptc_cdevsw) 667 return makedev(LINUX_PTC_MAJOR, minor(dev)); 668 if (cd == &pts_cdevsw) 669 return makedev(LINUX_PTS_MAJOR, minor(dev)); 670 671 return dev; 672 } 673 674 #if (NWSDISPLAY > 0) 675 /* 676 * That's not complete, but enough to get an X server running. 677 */ 678 #define NR_KEYS 128 679 static const u_short plain_map[NR_KEYS] = { 680 0x0200, 0x001b, 0x0031, 0x0032, 0x0033, 0x0034, 0x0035, 0x0036, 681 0x0037, 0x0038, 0x0039, 0x0030, 0x002d, 0x003d, 0x007f, 0x0009, 682 0x0b71, 0x0b77, 0x0b65, 0x0b72, 0x0b74, 0x0b79, 0x0b75, 0x0b69, 683 0x0b6f, 0x0b70, 0x005b, 0x005d, 0x0201, 0x0702, 0x0b61, 0x0b73, 684 0x0b64, 0x0b66, 0x0b67, 0x0b68, 0x0b6a, 0x0b6b, 0x0b6c, 0x003b, 685 0x0027, 0x0060, 0x0700, 0x005c, 0x0b7a, 0x0b78, 0x0b63, 0x0b76, 686 0x0b62, 0x0b6e, 0x0b6d, 0x002c, 0x002e, 0x002f, 0x0700, 0x030c, 687 0x0703, 0x0020, 0x0207, 0x0100, 0x0101, 0x0102, 0x0103, 0x0104, 688 0x0105, 0x0106, 0x0107, 0x0108, 0x0109, 0x0208, 0x0209, 0x0307, 689 0x0308, 0x0309, 0x030b, 0x0304, 0x0305, 0x0306, 0x030a, 0x0301, 690 0x0302, 0x0303, 0x0300, 0x0310, 0x0206, 0x0200, 0x003c, 0x010a, 691 0x010b, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 692 0x030e, 0x0702, 0x030d, 0x001c, 0x0701, 0x0205, 0x0114, 0x0603, 693 0x0118, 0x0601, 0x0602, 0x0117, 0x0600, 0x0119, 0x0115, 0x0116, 694 0x011a, 0x010c, 0x010d, 0x011b, 0x011c, 0x0110, 0x0311, 0x011d, 695 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 696 }, shift_map[NR_KEYS] = { 697 0x0200, 0x001b, 0x0021, 0x0040, 0x0023, 0x0024, 0x0025, 0x005e, 698 0x0026, 0x002a, 0x0028, 0x0029, 0x005f, 0x002b, 0x007f, 0x0009, 699 0x0b51, 0x0b57, 0x0b45, 0x0b52, 0x0b54, 0x0b59, 0x0b55, 0x0b49, 700 0x0b4f, 0x0b50, 0x007b, 0x007d, 0x0201, 0x0702, 0x0b41, 0x0b53, 701 0x0b44, 0x0b46, 0x0b47, 0x0b48, 0x0b4a, 0x0b4b, 0x0b4c, 0x003a, 702 0x0022, 0x007e, 0x0700, 0x007c, 0x0b5a, 0x0b58, 0x0b43, 0x0b56, 703 0x0b42, 0x0b4e, 0x0b4d, 0x003c, 0x003e, 0x003f, 0x0700, 0x030c, 704 0x0703, 0x0020, 0x0207, 0x010a, 0x010b, 0x010c, 0x010d, 0x010e, 705 0x010f, 0x0110, 0x0111, 0x0112, 0x0113, 0x0213, 0x0203, 0x0307, 706 0x0308, 0x0309, 0x030b, 0x0304, 0x0305, 0x0306, 0x030a, 0x0301, 707 0x0302, 0x0303, 0x0300, 0x0310, 0x0206, 0x0200, 0x003e, 0x010a, 708 0x010b, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 709 0x030e, 0x0702, 0x030d, 0x0200, 0x0701, 0x0205, 0x0114, 0x0603, 710 0x020b, 0x0601, 0x0602, 0x0117, 0x0600, 0x020a, 0x0115, 0x0116, 711 0x011a, 0x010c, 0x010d, 0x011b, 0x011c, 0x0110, 0x0311, 0x011d, 712 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 713 }, altgr_map[NR_KEYS] = { 714 0x0200, 0x0200, 0x0200, 0x0040, 0x0200, 0x0024, 0x0200, 0x0200, 715 0x007b, 0x005b, 0x005d, 0x007d, 0x005c, 0x0200, 0x0200, 0x0200, 716 0x0b71, 0x0b77, 0x0918, 0x0b72, 0x0b74, 0x0b79, 0x0b75, 0x0b69, 717 0x0b6f, 0x0b70, 0x0200, 0x007e, 0x0201, 0x0702, 0x0914, 0x0b73, 718 0x0917, 0x0919, 0x0b67, 0x0b68, 0x0b6a, 0x0b6b, 0x0b6c, 0x0200, 719 0x0200, 0x0200, 0x0700, 0x0200, 0x0b7a, 0x0b78, 0x0916, 0x0b76, 720 0x0915, 0x0b6e, 0x0b6d, 0x0200, 0x0200, 0x0200, 0x0700, 0x030c, 721 0x0703, 0x0200, 0x0207, 0x050c, 0x050d, 0x050e, 0x050f, 0x0510, 722 0x0511, 0x0512, 0x0513, 0x0514, 0x0515, 0x0208, 0x0202, 0x0911, 723 0x0912, 0x0913, 0x030b, 0x090e, 0x090f, 0x0910, 0x030a, 0x090b, 724 0x090c, 0x090d, 0x090a, 0x0310, 0x0206, 0x0200, 0x007c, 0x0516, 725 0x0517, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 726 0x030e, 0x0702, 0x030d, 0x0200, 0x0701, 0x0205, 0x0114, 0x0603, 727 0x0118, 0x0601, 0x0602, 0x0117, 0x0600, 0x0119, 0x0115, 0x0116, 728 0x011a, 0x010c, 0x010d, 0x011b, 0x011c, 0x0110, 0x0311, 0x011d, 729 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 730 }, ctrl_map[NR_KEYS] = { 731 0x0200, 0x0200, 0x0200, 0x0000, 0x001b, 0x001c, 0x001d, 0x001e, 732 0x001f, 0x007f, 0x0200, 0x0200, 0x001f, 0x0200, 0x0008, 0x0200, 733 0x0011, 0x0017, 0x0005, 0x0012, 0x0014, 0x0019, 0x0015, 0x0009, 734 0x000f, 0x0010, 0x001b, 0x001d, 0x0201, 0x0702, 0x0001, 0x0013, 735 0x0004, 0x0006, 0x0007, 0x0008, 0x000a, 0x000b, 0x000c, 0x0200, 736 0x0007, 0x0000, 0x0700, 0x001c, 0x001a, 0x0018, 0x0003, 0x0016, 737 0x0002, 0x000e, 0x000d, 0x0200, 0x020e, 0x007f, 0x0700, 0x030c, 738 0x0703, 0x0000, 0x0207, 0x0100, 0x0101, 0x0102, 0x0103, 0x0104, 739 0x0105, 0x0106, 0x0107, 0x0108, 0x0109, 0x0208, 0x0204, 0x0307, 740 0x0308, 0x0309, 0x030b, 0x0304, 0x0305, 0x0306, 0x030a, 0x0301, 741 0x0302, 0x0303, 0x0300, 0x0310, 0x0206, 0x0200, 0x0200, 0x010a, 742 0x010b, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 743 0x030e, 0x0702, 0x030d, 0x001c, 0x0701, 0x0205, 0x0114, 0x0603, 744 0x0118, 0x0601, 0x0602, 0x0117, 0x0600, 0x0119, 0x0115, 0x0116, 745 0x011a, 0x010c, 0x010d, 0x011b, 0x011c, 0x0110, 0x0311, 0x011d, 746 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 747 }; 748 749 const u_short * const linux_keytabs[] = { 750 plain_map, shift_map, altgr_map, altgr_map, ctrl_map 751 }; 752 #endif 753 754 static struct biosdisk_info * 755 fd2biosinfo(struct proc *p, struct file *fp) 756 { 757 struct vnode *vp; 758 const char *blkname; 759 char diskname[16]; 760 int i; 761 struct nativedisk_info *nip; 762 struct disklist *dl = x86_alldisks; 763 764 if (fp->f_type != DTYPE_VNODE) 765 return NULL; 766 vp = (struct vnode *)fp->f_data; 767 768 if (vp->v_type != VBLK) 769 return NULL; 770 771 blkname = devsw_blk2name(major(vp->v_rdev)); 772 snprintf(diskname, sizeof diskname, "%s%llu", blkname, 773 (unsigned long long)DISKUNIT(vp->v_rdev)); 774 775 for (i = 0; i < dl->dl_nnativedisks; i++) { 776 nip = &dl->dl_nativedisks[i]; 777 if (strcmp(diskname, nip->ni_devname)) 778 continue; 779 if (nip->ni_nmatches != 0) 780 return &dl->dl_biosdisks[nip->ni_biosmatches[0]]; 781 } 782 783 return NULL; 784 } 785 786 787 /* 788 * We come here in a last attempt to satisfy a Linux ioctl() call 789 */ 790 int 791 linux_machdepioctl(struct lwp *l, const struct linux_sys_ioctl_args *uap, register_t *retval) 792 { 793 /* { 794 syscallarg(int) fd; 795 syscallarg(u_long) com; 796 syscallarg(void *) data; 797 } */ 798 struct sys_ioctl_args bia; 799 u_long com; 800 int error, error1; 801 #if (NWSDISPLAY > 0) 802 struct vt_mode lvt; 803 struct kbentry kbe; 804 #endif 805 struct linux_hd_geometry hdg; 806 struct linux_hd_big_geometry hdg_big; 807 struct biosdisk_info *bip; 808 file_t *fp; 809 int fd; 810 struct disklabel label, *labp; 811 struct partinfo partp; 812 int (*ioctlf)(struct file *, u_long, void *); 813 u_long start, biostotal, realtotal; 814 u_char heads, sectors; 815 u_int cylinders; 816 struct ioctl_pt pt; 817 818 fd = SCARG(uap, fd); 819 SCARG(&bia, fd) = fd; 820 SCARG(&bia, data) = SCARG(uap, data); 821 com = SCARG(uap, com); 822 823 if ((fp = fd_getfile(fd)) == NULL) 824 return (EBADF); 825 826 switch (com) { 827 #if (NWSDISPLAY > 0) 828 case LINUX_KDGKBMODE: 829 com = KDGKBMODE; 830 break; 831 case LINUX_KDSKBMODE: 832 com = KDSKBMODE; 833 if ((unsigned)SCARG(uap, data) == LINUX_K_MEDIUMRAW) 834 SCARG(&bia, data) = (void *)K_RAW; 835 break; 836 case LINUX_KIOCSOUND: 837 SCARG(&bia, data) = 838 (void *)(((unsigned long)SCARG(&bia, data)) & 0xffff); 839 /* fall through */ 840 case LINUX_KDMKTONE: 841 com = KDMKTONE; 842 break; 843 case LINUX_KDSETMODE: 844 com = KDSETMODE; 845 break; 846 case LINUX_KDGETMODE: 847 /* KD_* values are equal to the wscons numbers */ 848 com = WSDISPLAYIO_GMODE; 849 break; 850 case LINUX_KDENABIO: 851 com = KDENABIO; 852 break; 853 case LINUX_KDDISABIO: 854 com = KDDISABIO; 855 break; 856 case LINUX_KDGETLED: 857 com = KDGETLED; 858 break; 859 case LINUX_KDSETLED: 860 com = KDSETLED; 861 break; 862 case LINUX_VT_OPENQRY: 863 com = VT_OPENQRY; 864 break; 865 case LINUX_VT_GETMODE: 866 error = fp->f_ops->fo_ioctl(fp, VT_GETMODE, &lvt); 867 if (error != 0) 868 goto out; 869 lvt.relsig = native_to_linux_signo[lvt.relsig]; 870 lvt.acqsig = native_to_linux_signo[lvt.acqsig]; 871 lvt.frsig = native_to_linux_signo[lvt.frsig]; 872 error = copyout(&lvt, SCARG(uap, data), sizeof (lvt)); 873 goto out; 874 case LINUX_VT_SETMODE: 875 error = copyin(SCARG(uap, data), &lvt, sizeof (lvt)); 876 if (error != 0) 877 goto out; 878 lvt.relsig = linux_to_native_signo[lvt.relsig]; 879 lvt.acqsig = linux_to_native_signo[lvt.acqsig]; 880 lvt.frsig = linux_to_native_signo[lvt.frsig]; 881 error = fp->f_ops->fo_ioctl(fp, VT_SETMODE, &lvt); 882 goto out; 883 case LINUX_VT_DISALLOCATE: 884 /* XXX should use WSDISPLAYIO_DELSCREEN */ 885 error = 0; 886 goto out; 887 case LINUX_VT_RELDISP: 888 com = VT_RELDISP; 889 break; 890 case LINUX_VT_ACTIVATE: 891 com = VT_ACTIVATE; 892 break; 893 case LINUX_VT_WAITACTIVE: 894 com = VT_WAITACTIVE; 895 break; 896 case LINUX_VT_GETSTATE: 897 com = VT_GETSTATE; 898 break; 899 case LINUX_KDGKBTYPE: 900 { 901 static const u_int8_t kb101 = KB_101; 902 903 /* This is what Linux does. */ 904 error = copyout(&kb101, SCARG(uap, data), 1); 905 goto out; 906 } 907 case LINUX_KDGKBENT: 908 /* 909 * The Linux KDGKBENT ioctl is different from the 910 * SYSV original. So we handle it in machdep code. 911 * XXX We should use keyboard mapping information 912 * from wsdisplay, but this would be expensive. 913 */ 914 if ((error = copyin(SCARG(uap, data), &kbe, 915 sizeof(struct kbentry)))) 916 goto out; 917 if (kbe.kb_table >= sizeof(linux_keytabs) / sizeof(u_short *) 918 || kbe.kb_index >= NR_KEYS) { 919 error = EINVAL; 920 goto out; 921 } 922 kbe.kb_value = linux_keytabs[kbe.kb_table][kbe.kb_index]; 923 error = copyout(&kbe, SCARG(uap, data), 924 sizeof(struct kbentry)); 925 goto out; 926 #endif 927 case LINUX_HDIO_GETGEO: 928 case LINUX_HDIO_GETGEO_BIG: 929 /* 930 * Try to mimic Linux behaviour: return the BIOS geometry 931 * if possible (extending its # of cylinders if it's beyond 932 * the 1023 limit), fall back to the MI geometry (i.e. 933 * the real geometry) if not found, by returning an 934 * error. See common/linux_hdio.c 935 */ 936 bip = fd2biosinfo(curproc, fp); 937 ioctlf = fp->f_ops->fo_ioctl; 938 error = ioctlf(fp, DIOCGDEFLABEL, (void *)&label); 939 error1 = ioctlf(fp, DIOCGPART, (void *)&partp); 940 if (error != 0 && error1 != 0) { 941 error = error1; 942 goto out; 943 } 944 labp = error != 0 ? &label : partp.disklab; 945 start = error1 != 0 ? partp.part->p_offset : 0; 946 if (bip != NULL && bip->bi_head != 0 && bip->bi_sec != 0 947 && bip->bi_cyl != 0) { 948 heads = bip->bi_head; 949 sectors = bip->bi_sec; 950 cylinders = bip->bi_cyl; 951 biostotal = heads * sectors * cylinders; 952 realtotal = labp->d_ntracks * labp->d_nsectors * 953 labp->d_ncylinders; 954 if (realtotal > biostotal) 955 cylinders = realtotal / (heads * sectors); 956 } else { 957 heads = labp->d_ntracks; 958 cylinders = labp->d_ncylinders; 959 sectors = labp->d_nsectors; 960 } 961 if (com == LINUX_HDIO_GETGEO) { 962 hdg.start = start; 963 hdg.heads = heads; 964 hdg.cylinders = cylinders; 965 hdg.sectors = sectors; 966 error = copyout(&hdg, SCARG(uap, data), sizeof hdg); 967 goto out; 968 } else { 969 hdg_big.start = start; 970 hdg_big.heads = heads; 971 hdg_big.cylinders = cylinders; 972 hdg_big.sectors = sectors; 973 error = copyout(&hdg_big, SCARG(uap, data), 974 sizeof hdg_big); 975 goto out; 976 } 977 978 default: 979 /* 980 * Unknown to us. If it's on a device, just pass it through 981 * using PTIOCLINUX, the device itself might be able to 982 * make some sense of it. 983 * XXX hack: if the function returns EJUSTRETURN, 984 * it has stuffed a sysctl return value in pt.data. 985 */ 986 ioctlf = fp->f_ops->fo_ioctl; 987 pt.com = SCARG(uap, com); 988 pt.data = SCARG(uap, data); 989 error = ioctlf(fp, PTIOCLINUX, &pt); 990 if (error == EJUSTRETURN) { 991 retval[0] = (register_t)pt.data; 992 error = 0; 993 } 994 995 if (error == ENOTTY) { 996 DPRINTF(("linux_machdepioctl: invalid ioctl %08lx\n", 997 com)); 998 } 999 goto out; 1000 } 1001 SCARG(&bia, com) = com; 1002 error = sys_ioctl(curlwp, &bia, retval); 1003 out: 1004 fd_putfile(fd); 1005 return error; 1006 } 1007 1008 /* 1009 * Set I/O permissions for a process. Just set the maximum level 1010 * right away (ignoring the argument), otherwise we would have 1011 * to rely on I/O permission maps, which are not implemented. 1012 */ 1013 int 1014 linux_sys_iopl(struct lwp *l, const struct linux_sys_iopl_args *uap, register_t *retval) 1015 { 1016 /* { 1017 syscallarg(int) level; 1018 } */ 1019 struct trapframe *fp = l->l_md.md_regs; 1020 1021 if (kauth_authorize_machdep(l->l_cred, KAUTH_MACHDEP_IOPL, 1022 NULL, NULL, NULL, NULL) != 0) 1023 return EPERM; 1024 fp->tf_eflags |= PSL_IOPL; 1025 *retval = 0; 1026 return 0; 1027 } 1028 1029 /* 1030 * See above. If a root process tries to set access to an I/O port, 1031 * just let it have the whole range. 1032 */ 1033 int 1034 linux_sys_ioperm(struct lwp *l, const struct linux_sys_ioperm_args *uap, register_t *retval) 1035 { 1036 /* { 1037 syscallarg(unsigned int) lo; 1038 syscallarg(unsigned int) hi; 1039 syscallarg(int) val; 1040 } */ 1041 struct trapframe *fp = l->l_md.md_regs; 1042 1043 if (kauth_authorize_machdep(l->l_cred, SCARG(uap, val) ? 1044 KAUTH_MACHDEP_IOPERM_SET : KAUTH_MACHDEP_IOPERM_GET, NULL, NULL, 1045 NULL, NULL) != 0) 1046 return EPERM; 1047 if (SCARG(uap, val)) 1048 fp->tf_eflags |= PSL_IOPL; 1049 *retval = 0; 1050 return 0; 1051 } 1052 1053 int 1054 linux_usertrap(struct lwp *l, vaddr_t trapaddr, 1055 void *arg) 1056 { 1057 return 0; 1058 } 1059 1060 const char * 1061 linux_get_uname_arch(void) 1062 { 1063 static char uname_arch[5] = "i386"; 1064 1065 if (uname_arch[1] == '3') 1066 uname_arch[1] += cpu_class; 1067 return uname_arch; 1068 } 1069