1 /* $NetBSD: linux_machdep.c,v 1.165 2017/09/17 09:41:35 maxv Exp $ */ 2 3 /*- 4 * Copyright (c) 1995, 2000, 2008, 2009 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Frank van der Linden, and by Andrew Doran. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 29 * POSSIBILITY OF SUCH DAMAGE. 30 */ 31 32 #include <sys/cdefs.h> 33 __KERNEL_RCSID(0, "$NetBSD: linux_machdep.c,v 1.165 2017/09/17 09:41:35 maxv Exp $"); 34 35 #if defined(_KERNEL_OPT) 36 #include "opt_user_ldt.h" 37 #endif 38 39 #include <sys/param.h> 40 #include <sys/systm.h> 41 #include <sys/signalvar.h> 42 #include <sys/kernel.h> 43 #include <sys/proc.h> 44 #include <sys/buf.h> 45 #include <sys/reboot.h> 46 #include <sys/conf.h> 47 #include <sys/exec.h> 48 #include <sys/file.h> 49 #include <sys/callout.h> 50 #include <sys/mbuf.h> 51 #include <sys/msgbuf.h> 52 #include <sys/mount.h> 53 #include <sys/vnode.h> 54 #include <sys/device.h> 55 #include <sys/syscallargs.h> 56 #include <sys/filedesc.h> 57 #include <sys/exec_elf.h> 58 #include <sys/disklabel.h> 59 #include <sys/ioctl.h> 60 #include <sys/wait.h> 61 #include <sys/kauth.h> 62 #include <sys/kmem.h> 63 64 #include <miscfs/specfs/specdev.h> 65 66 #include <compat/linux/common/linux_types.h> 67 #include <compat/linux/common/linux_signal.h> 68 #include <compat/linux/common/linux_util.h> 69 #include <compat/linux/common/linux_ioctl.h> 70 #include <compat/linux/common/linux_hdio.h> 71 #include <compat/linux/common/linux_exec.h> 72 #include <compat/linux/common/linux_machdep.h> 73 #include <compat/linux/common/linux_errno.h> 74 75 #include <compat/linux/linux_syscallargs.h> 76 77 #include <sys/cpu.h> 78 #include <machine/cpufunc.h> 79 #include <machine/psl.h> 80 #include <machine/reg.h> 81 #include <machine/segments.h> 82 #include <machine/specialreg.h> 83 #include <machine/sysarch.h> 84 #include <machine/vmparam.h> 85 86 #include <x86/fpu.h> 87 88 /* 89 * To see whether wscons is configured (for virtual console ioctl calls). 90 */ 91 #if defined(_KERNEL_OPT) 92 #include "wsdisplay.h" 93 #endif 94 #if (NWSDISPLAY > 0) 95 #include <dev/wscons/wsconsio.h> 96 #include <dev/wscons/wsdisplay_usl_io.h> 97 #if defined(_KERNEL_OPT) 98 #include "opt_xserver.h" 99 #endif 100 #endif 101 102 #ifdef DEBUG_LINUX 103 #define DPRINTF(a) uprintf a 104 #else 105 #define DPRINTF(a) 106 #endif 107 108 extern struct disklist *x86_alldisks; 109 110 static struct biosdisk_info *fd2biosinfo(struct proc *, struct file *); 111 static void linux_save_ucontext(struct lwp *, struct trapframe *, 112 const sigset_t *, struct sigaltstack *, struct linux_ucontext *); 113 static void linux_save_sigcontext(struct lwp *, struct trapframe *, 114 const sigset_t *, struct linux_sigcontext *); 115 static int linux_restore_sigcontext(struct lwp *, 116 struct linux_sigcontext *, register_t *); 117 static void linux_rt_sendsig(const ksiginfo_t *, const sigset_t *); 118 static void linux_old_sendsig(const ksiginfo_t *, const sigset_t *); 119 120 extern char linux_sigcode[], linux_rt_sigcode[]; 121 122 /* 123 * Deal with some i386-specific things in the Linux emulation code. 124 */ 125 126 void 127 linux_setregs(struct lwp *l, struct exec_package *epp, vaddr_t stack) 128 { 129 struct trapframe *tf; 130 131 #ifdef USER_LDT 132 pmap_ldt_cleanup(l); 133 #endif 134 135 fpu_save_area_clear(l, __Linux_NPXCW__); 136 137 tf = l->l_md.md_regs; 138 tf->tf_gs = 0; 139 tf->tf_fs = GSEL(GUDATA_SEL, SEL_UPL); 140 tf->tf_es = GSEL(GUDATA_SEL, SEL_UPL); 141 tf->tf_ds = GSEL(GUDATA_SEL, SEL_UPL); 142 tf->tf_edi = 0; 143 tf->tf_esi = 0; 144 tf->tf_ebp = 0; 145 tf->tf_ebx = l->l_proc->p_psstrp; 146 tf->tf_edx = 0; 147 tf->tf_ecx = 0; 148 tf->tf_eax = 0; 149 tf->tf_eip = epp->ep_entry; 150 tf->tf_cs = GSEL(GUCODEBIG_SEL, SEL_UPL); 151 tf->tf_eflags = PSL_USERSET; 152 tf->tf_esp = stack; 153 tf->tf_ss = GSEL(GUDATA_SEL, SEL_UPL); 154 } 155 156 /* 157 * Send an interrupt to process. 158 * 159 * Stack is set up to allow sigcode stored 160 * in u. to call routine, followed by kcall 161 * to sigreturn routine below. After sigreturn 162 * resets the signal mask, the stack, and the 163 * frame pointer, it returns to the user 164 * specified pc, psl. 165 */ 166 167 void 168 linux_sendsig(const ksiginfo_t *ksi, const sigset_t *mask) 169 { 170 if (SIGACTION(curproc, ksi->ksi_signo).sa_flags & SA_SIGINFO) 171 linux_rt_sendsig(ksi, mask); 172 else 173 linux_old_sendsig(ksi, mask); 174 } 175 176 177 static void 178 linux_save_ucontext(struct lwp *l, struct trapframe *tf, const sigset_t *mask, struct sigaltstack *sas, struct linux_ucontext *uc) 179 { 180 uc->uc_flags = 0; 181 uc->uc_link = NULL; 182 native_to_linux_sigaltstack(&uc->uc_stack, sas); 183 linux_save_sigcontext(l, tf, mask, &uc->uc_mcontext); 184 native_to_linux_sigset(&uc->uc_sigmask, mask); 185 (void)memset(&uc->uc_fpregs_mem, 0, sizeof(uc->uc_fpregs_mem)); 186 } 187 188 static void 189 linux_save_sigcontext(struct lwp *l, struct trapframe *tf, 190 const sigset_t *mask, struct linux_sigcontext *sc) 191 { 192 struct pcb *pcb = lwp_getpcb(l); 193 194 /* Save register context. */ 195 sc->sc_gs = tf->tf_gs; 196 sc->sc_fs = tf->tf_fs; 197 sc->sc_es = tf->tf_es; 198 sc->sc_ds = tf->tf_ds; 199 sc->sc_eflags = tf->tf_eflags; 200 201 sc->sc_edi = tf->tf_edi; 202 sc->sc_esi = tf->tf_esi; 203 sc->sc_esp = tf->tf_esp; 204 sc->sc_ebp = tf->tf_ebp; 205 sc->sc_ebx = tf->tf_ebx; 206 sc->sc_edx = tf->tf_edx; 207 sc->sc_ecx = tf->tf_ecx; 208 sc->sc_eax = tf->tf_eax; 209 sc->sc_eip = tf->tf_eip; 210 sc->sc_cs = tf->tf_cs; 211 sc->sc_esp_at_signal = tf->tf_esp; 212 sc->sc_ss = tf->tf_ss; 213 sc->sc_err = tf->tf_err; 214 sc->sc_trapno = tf->tf_trapno; 215 sc->sc_cr2 = pcb->pcb_cr2; 216 sc->sc_387 = NULL; 217 218 /* Save signal stack. */ 219 /* Linux doesn't save the onstack flag in sigframe */ 220 221 /* Save signal mask. */ 222 native_to_linux_old_sigset(&sc->sc_mask, mask); 223 } 224 225 static void 226 linux_rt_sendsig(const ksiginfo_t *ksi, const sigset_t *mask) 227 { 228 struct lwp *l = curlwp; 229 struct proc *p = l->l_proc; 230 struct trapframe *tf; 231 struct linux_rt_sigframe *fp, frame; 232 int onstack, error; 233 int sig = ksi->ksi_signo; 234 sig_t catcher = SIGACTION(p, sig).sa_handler; 235 struct sigaltstack *sas = &l->l_sigstk; 236 237 tf = l->l_md.md_regs; 238 /* Do we need to jump onto the signal stack? */ 239 onstack = (sas->ss_flags & (SS_DISABLE | SS_ONSTACK)) == 0 && 240 (SIGACTION(p, sig).sa_flags & SA_ONSTACK) != 0; 241 242 243 /* Allocate space for the signal handler context. */ 244 if (onstack) 245 fp = (struct linux_rt_sigframe *)((char *)sas->ss_sp + 246 sas->ss_size); 247 else 248 fp = (struct linux_rt_sigframe *)tf->tf_esp; 249 fp--; 250 251 DPRINTF(("rt: onstack = %d, fp = %p sig = %d eip = 0x%x cr2 = 0x%x\n", 252 onstack, fp, sig, tf->tf_eip, 253 ((struct pcb *)lwp_getpcb(l))->pcb_cr2)); 254 255 /* Build stack frame for signal trampoline. */ 256 frame.sf_handler = catcher; 257 frame.sf_sig = native_to_linux_signo[sig]; 258 frame.sf_sip = &fp->sf_si; 259 frame.sf_ucp = &fp->sf_uc; 260 261 /* 262 * XXX: the following code assumes that the constants for 263 * siginfo are the same between linux and NetBSD. 264 */ 265 native_to_linux_siginfo(&frame.sf_si, &ksi->ksi_info); 266 267 /* Save register context. */ 268 linux_save_ucontext(l, tf, mask, sas, &frame.sf_uc); 269 sendsig_reset(l, sig); 270 271 mutex_exit(p->p_lock); 272 error = copyout(&frame, fp, sizeof(frame)); 273 mutex_enter(p->p_lock); 274 275 if (error != 0) { 276 /* 277 * Process has trashed its stack; give it an illegal 278 * instruction to halt it in its tracks. 279 */ 280 sigexit(l, SIGILL); 281 /* NOTREACHED */ 282 } 283 284 /* 285 * Build context to run handler in. 286 */ 287 tf->tf_fs = GSEL(GUDATA_SEL, SEL_UPL); 288 tf->tf_es = GSEL(GUDATA_SEL, SEL_UPL); 289 tf->tf_ds = GSEL(GUDATA_SEL, SEL_UPL); 290 tf->tf_eip = ((int)p->p_sigctx.ps_sigcode) + 291 (linux_rt_sigcode - linux_sigcode); 292 tf->tf_cs = GSEL(GUCODE_SEL, SEL_UPL); 293 tf->tf_eflags &= ~PSL_CLEARSIG; 294 tf->tf_esp = (int)fp; 295 tf->tf_ss = GSEL(GUDATA_SEL, SEL_UPL); 296 297 /* Remember that we're now on the signal stack. */ 298 if (onstack) 299 sas->ss_flags |= SS_ONSTACK; 300 } 301 302 static void 303 linux_old_sendsig(const ksiginfo_t *ksi, const sigset_t *mask) 304 { 305 struct lwp *l = curlwp; 306 struct proc *p = l->l_proc; 307 struct trapframe *tf; 308 struct linux_sigframe *fp, frame; 309 int onstack, error; 310 int sig = ksi->ksi_signo; 311 sig_t catcher = SIGACTION(p, sig).sa_handler; 312 struct sigaltstack *sas = &l->l_sigstk; 313 314 tf = l->l_md.md_regs; 315 316 /* Do we need to jump onto the signal stack? */ 317 onstack = (sas->ss_flags & (SS_DISABLE | SS_ONSTACK)) == 0 && 318 (SIGACTION(p, sig).sa_flags & SA_ONSTACK) != 0; 319 320 /* Allocate space for the signal handler context. */ 321 if (onstack) 322 fp = (struct linux_sigframe *) ((char *)sas->ss_sp + 323 sas->ss_size); 324 else 325 fp = (struct linux_sigframe *)tf->tf_esp; 326 fp--; 327 328 DPRINTF(("old: onstack = %d, fp = %p sig = %d eip = 0x%x cr2 = 0x%x\n", 329 onstack, fp, sig, tf->tf_eip, 330 ((struct pcb *)lwp_getpcb(l))->pcb_cr2)); 331 332 /* Build stack frame for signal trampoline. */ 333 frame.sf_handler = catcher; 334 frame.sf_sig = native_to_linux_signo[sig]; 335 336 linux_save_sigcontext(l, tf, mask, &frame.sf_sc); 337 sendsig_reset(l, sig); 338 339 mutex_exit(p->p_lock); 340 error = copyout(&frame, fp, sizeof(frame)); 341 mutex_enter(p->p_lock); 342 343 if (error != 0) { 344 /* 345 * Process has trashed its stack; give it an illegal 346 * instruction to halt it in its tracks. 347 */ 348 sigexit(l, SIGILL); 349 /* NOTREACHED */ 350 } 351 352 /* 353 * Build context to run handler in. 354 */ 355 tf->tf_fs = GSEL(GUDATA_SEL, SEL_UPL); 356 tf->tf_es = GSEL(GUDATA_SEL, SEL_UPL); 357 tf->tf_ds = GSEL(GUDATA_SEL, SEL_UPL); 358 tf->tf_eip = (int)p->p_sigctx.ps_sigcode; 359 tf->tf_cs = GSEL(GUCODEBIG_SEL, SEL_UPL); 360 tf->tf_eflags &= ~PSL_CLEARSIG; 361 tf->tf_esp = (int)fp; 362 tf->tf_ss = GSEL(GUDATA_SEL, SEL_UPL); 363 364 /* Remember that we're now on the signal stack. */ 365 if (onstack) 366 sas->ss_flags |= SS_ONSTACK; 367 } 368 369 /* 370 * System call to cleanup state after a signal 371 * has been taken. Reset signal mask and 372 * stack state from context left by sendsig (above). 373 * Return to previous pc and psl as specified by 374 * context left by sendsig. Check carefully to 375 * make sure that the user has not modified the 376 * psl to gain improper privileges or to cause 377 * a machine fault. 378 */ 379 int 380 linux_sys_rt_sigreturn(struct lwp *l, const struct linux_sys_rt_sigreturn_args *uap, register_t *retval) 381 { 382 /* { 383 syscallarg(struct linux_ucontext *) ucp; 384 } */ 385 struct linux_ucontext context, *ucp = SCARG(uap, ucp); 386 int error; 387 388 /* 389 * The trampoline code hands us the context. 390 * It is unsafe to keep track of it ourselves, in the event that a 391 * program jumps out of a signal handler. 392 */ 393 if ((error = copyin(ucp, &context, sizeof(*ucp))) != 0) 394 return error; 395 396 /* XXX XAX we can do better here by using more of the ucontext */ 397 return linux_restore_sigcontext(l, &context.uc_mcontext, retval); 398 } 399 400 int 401 linux_sys_sigreturn(struct lwp *l, const struct linux_sys_sigreturn_args *uap, register_t *retval) 402 { 403 /* { 404 syscallarg(struct linux_sigcontext *) scp; 405 } */ 406 struct linux_sigcontext context, *scp = SCARG(uap, scp); 407 int error; 408 409 /* 410 * The trampoline code hands us the context. 411 * It is unsafe to keep track of it ourselves, in the event that a 412 * program jumps out of a signal handler. 413 */ 414 if ((error = copyin((void *)scp, &context, sizeof(*scp))) != 0) 415 return error; 416 return linux_restore_sigcontext(l, &context, retval); 417 } 418 419 static int 420 linux_restore_sigcontext(struct lwp *l, struct linux_sigcontext *scp, 421 register_t *retval) 422 { 423 struct proc *p = l->l_proc; 424 struct sigaltstack *sas = &l->l_sigstk; 425 struct trapframe *tf; 426 sigset_t mask; 427 ssize_t ss_gap; 428 429 /* Restore register context. */ 430 tf = l->l_md.md_regs; 431 DPRINTF(("sigreturn enter esp=0x%x eip=0x%x\n", tf->tf_esp, tf->tf_eip)); 432 433 /* 434 * Check for security violations. If we're returning to 435 * protected mode, the CPU will validate the segment registers 436 * automatically and generate a trap on violations. We handle 437 * the trap, rather than doing all of the checking here. 438 */ 439 if (((scp->sc_eflags ^ tf->tf_eflags) & PSL_USERSTATIC) != 0 || 440 !USERMODE(scp->sc_cs)) 441 return EINVAL; 442 443 tf->tf_gs = scp->sc_gs; 444 tf->tf_fs = scp->sc_fs; 445 tf->tf_es = scp->sc_es; 446 tf->tf_ds = scp->sc_ds; 447 tf->tf_eflags = scp->sc_eflags; 448 449 tf->tf_edi = scp->sc_edi; 450 tf->tf_esi = scp->sc_esi; 451 tf->tf_ebp = scp->sc_ebp; 452 tf->tf_ebx = scp->sc_ebx; 453 tf->tf_edx = scp->sc_edx; 454 tf->tf_ecx = scp->sc_ecx; 455 tf->tf_eax = scp->sc_eax; 456 tf->tf_eip = scp->sc_eip; 457 tf->tf_cs = scp->sc_cs; 458 tf->tf_esp = scp->sc_esp_at_signal; 459 tf->tf_ss = scp->sc_ss; 460 461 /* Restore signal stack. */ 462 /* 463 * Linux really does it this way; it doesn't have space in sigframe 464 * to save the onstack flag. 465 */ 466 mutex_enter(p->p_lock); 467 ss_gap = (ssize_t)((char *)scp->sc_esp_at_signal - (char *)sas->ss_sp); 468 if (ss_gap >= 0 && ss_gap < sas->ss_size) 469 sas->ss_flags |= SS_ONSTACK; 470 else 471 sas->ss_flags &= ~SS_ONSTACK; 472 473 /* Restore signal mask. */ 474 linux_old_to_native_sigset(&mask, &scp->sc_mask); 475 (void) sigprocmask1(l, SIG_SETMASK, &mask, 0); 476 mutex_exit(p->p_lock); 477 478 DPRINTF(("sigreturn exit esp=0x%x eip=0x%x\n", tf->tf_esp, tf->tf_eip)); 479 return EJUSTRETURN; 480 } 481 482 #ifdef USER_LDT 483 484 static int 485 linux_read_ldt(struct lwp *l, const struct linux_sys_modify_ldt_args *uap, 486 register_t *retval) 487 { 488 struct x86_get_ldt_args gl; 489 int error; 490 union descriptor *ldt_buf; 491 size_t sz; 492 493 /* 494 * I've checked the linux code - this function is asymetric with 495 * linux_write_ldt, and returns raw ldt entries. 496 * NB, the code I saw zerod the spare parts of the user buffer. 497 */ 498 499 DPRINTF(("linux_read_ldt!")); 500 501 sz = 8192 * sizeof(*ldt_buf); 502 ldt_buf = kmem_zalloc(sz, KM_SLEEP); 503 gl.start = 0; 504 gl.desc = NULL; 505 gl.num = SCARG(uap, bytecount) / sizeof(union descriptor); 506 error = x86_get_ldt1(l, &gl, ldt_buf); 507 /* NB gl.num might have changed */ 508 if (error == 0) { 509 *retval = gl.num * sizeof(*ldtstore); 510 error = copyout(ldt_buf, SCARG(uap, ptr), 511 gl.num * sizeof *ldt_buf); 512 } 513 kmem_free(ldt_buf, sz); 514 515 return error; 516 } 517 518 struct linux_ldt_info { 519 u_int entry_number; 520 u_long base_addr; 521 u_int limit; 522 u_int seg_32bit:1; 523 u_int contents:2; 524 u_int read_exec_only:1; 525 u_int limit_in_pages:1; 526 u_int seg_not_present:1; 527 u_int useable:1; 528 }; 529 530 static int 531 linux_write_ldt(struct lwp *l, const struct linux_sys_modify_ldt_args *uap, 532 int oldmode) 533 { 534 struct linux_ldt_info ldt_info; 535 union descriptor d; 536 struct x86_set_ldt_args sl; 537 int error; 538 539 DPRINTF(("linux_write_ldt %d\n", oldmode)); 540 if (SCARG(uap, bytecount) != sizeof(ldt_info)) 541 return (EINVAL); 542 if ((error = copyin(SCARG(uap, ptr), &ldt_info, sizeof(ldt_info))) != 0) 543 return error; 544 if (ldt_info.entry_number >= 8192) 545 return (EINVAL); 546 if (ldt_info.contents == 3) { 547 if (oldmode) 548 return (EINVAL); 549 if (ldt_info.seg_not_present) 550 return (EINVAL); 551 } 552 553 if (ldt_info.base_addr == 0 && ldt_info.limit == 0 && 554 (oldmode || (ldt_info.contents == 0 && 555 ldt_info.read_exec_only == 1 && ldt_info.seg_32bit == 0 && 556 ldt_info.limit_in_pages == 0 && ldt_info.seg_not_present == 1 && 557 ldt_info.useable == 0))) { 558 /* this means you should zero the ldt */ 559 (void)memset(&d, 0, sizeof(d)); 560 } else { 561 d.sd.sd_lobase = ldt_info.base_addr & 0xffffff; 562 d.sd.sd_hibase = (ldt_info.base_addr >> 24) & 0xff; 563 d.sd.sd_lolimit = ldt_info.limit & 0xffff; 564 d.sd.sd_hilimit = (ldt_info.limit >> 16) & 0xf; 565 d.sd.sd_type = 16 | (ldt_info.contents << 2) | 566 (!ldt_info.read_exec_only << 1); 567 d.sd.sd_dpl = SEL_UPL; 568 d.sd.sd_p = !ldt_info.seg_not_present; 569 d.sd.sd_def32 = ldt_info.seg_32bit; 570 d.sd.sd_gran = ldt_info.limit_in_pages; 571 if (!oldmode) 572 d.sd.sd_xx = ldt_info.useable; 573 else 574 d.sd.sd_xx = 0; 575 } 576 sl.start = ldt_info.entry_number; 577 sl.desc = NULL; 578 sl.num = 1; 579 580 DPRINTF(("linux_write_ldt: idx=%d, base=0x%lx, limit=0x%x\n", 581 ldt_info.entry_number, ldt_info.base_addr, ldt_info.limit)); 582 583 return x86_set_ldt1(l, &sl, &d); 584 } 585 586 #endif /* USER_LDT */ 587 588 int 589 linux_sys_modify_ldt(struct lwp *l, const struct linux_sys_modify_ldt_args *uap, register_t *retval) 590 { 591 /* { 592 syscallarg(int) func; 593 syscallarg(void *) ptr; 594 syscallarg(size_t) bytecount; 595 } */ 596 597 switch (SCARG(uap, func)) { 598 #ifdef USER_LDT 599 case 0: 600 return linux_read_ldt(l, (const void *)uap, retval); 601 case 1: 602 return linux_write_ldt(l, (const void *)uap, 1); 603 case 2: 604 #ifdef notyet 605 return linux_read_default_ldt(l, (const void *)uap, retval); 606 #else 607 return (ENOSYS); 608 #endif 609 case 0x11: 610 return linux_write_ldt(l, (const void *)uap, 0); 611 #endif /* USER_LDT */ 612 613 default: 614 return (ENOSYS); 615 } 616 } 617 618 /* 619 * XXX Pathetic hack to make svgalib work. This will fake the major 620 * device number of an opened VT so that svgalib likes it. grmbl. 621 * Should probably do it 'wrong the right way' and use a mapping 622 * array for all major device numbers, and map linux_mknod too. 623 */ 624 dev_t 625 linux_fakedev(dev_t dev, int raw) 626 { 627 extern const struct cdevsw ptc_cdevsw, pts_cdevsw; 628 const struct cdevsw *cd = cdevsw_lookup(dev); 629 630 if (raw) { 631 #if (NWSDISPLAY > 0) 632 extern const struct cdevsw wsdisplay_cdevsw; 633 if (cd == &wsdisplay_cdevsw) 634 return makedev(LINUX_CONS_MAJOR, (minor(dev) + 1)); 635 #endif 636 } 637 638 if (cd == &ptc_cdevsw) 639 return makedev(LINUX_PTC_MAJOR, minor(dev)); 640 if (cd == &pts_cdevsw) 641 return makedev(LINUX_PTS_MAJOR, minor(dev)); 642 643 return dev; 644 } 645 646 #if (NWSDISPLAY > 0) 647 /* 648 * That's not complete, but enough to get an X server running. 649 */ 650 #define NR_KEYS 128 651 static const u_short plain_map[NR_KEYS] = { 652 0x0200, 0x001b, 0x0031, 0x0032, 0x0033, 0x0034, 0x0035, 0x0036, 653 0x0037, 0x0038, 0x0039, 0x0030, 0x002d, 0x003d, 0x007f, 0x0009, 654 0x0b71, 0x0b77, 0x0b65, 0x0b72, 0x0b74, 0x0b79, 0x0b75, 0x0b69, 655 0x0b6f, 0x0b70, 0x005b, 0x005d, 0x0201, 0x0702, 0x0b61, 0x0b73, 656 0x0b64, 0x0b66, 0x0b67, 0x0b68, 0x0b6a, 0x0b6b, 0x0b6c, 0x003b, 657 0x0027, 0x0060, 0x0700, 0x005c, 0x0b7a, 0x0b78, 0x0b63, 0x0b76, 658 0x0b62, 0x0b6e, 0x0b6d, 0x002c, 0x002e, 0x002f, 0x0700, 0x030c, 659 0x0703, 0x0020, 0x0207, 0x0100, 0x0101, 0x0102, 0x0103, 0x0104, 660 0x0105, 0x0106, 0x0107, 0x0108, 0x0109, 0x0208, 0x0209, 0x0307, 661 0x0308, 0x0309, 0x030b, 0x0304, 0x0305, 0x0306, 0x030a, 0x0301, 662 0x0302, 0x0303, 0x0300, 0x0310, 0x0206, 0x0200, 0x003c, 0x010a, 663 0x010b, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 664 0x030e, 0x0702, 0x030d, 0x001c, 0x0701, 0x0205, 0x0114, 0x0603, 665 0x0118, 0x0601, 0x0602, 0x0117, 0x0600, 0x0119, 0x0115, 0x0116, 666 0x011a, 0x010c, 0x010d, 0x011b, 0x011c, 0x0110, 0x0311, 0x011d, 667 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 668 }, shift_map[NR_KEYS] = { 669 0x0200, 0x001b, 0x0021, 0x0040, 0x0023, 0x0024, 0x0025, 0x005e, 670 0x0026, 0x002a, 0x0028, 0x0029, 0x005f, 0x002b, 0x007f, 0x0009, 671 0x0b51, 0x0b57, 0x0b45, 0x0b52, 0x0b54, 0x0b59, 0x0b55, 0x0b49, 672 0x0b4f, 0x0b50, 0x007b, 0x007d, 0x0201, 0x0702, 0x0b41, 0x0b53, 673 0x0b44, 0x0b46, 0x0b47, 0x0b48, 0x0b4a, 0x0b4b, 0x0b4c, 0x003a, 674 0x0022, 0x007e, 0x0700, 0x007c, 0x0b5a, 0x0b58, 0x0b43, 0x0b56, 675 0x0b42, 0x0b4e, 0x0b4d, 0x003c, 0x003e, 0x003f, 0x0700, 0x030c, 676 0x0703, 0x0020, 0x0207, 0x010a, 0x010b, 0x010c, 0x010d, 0x010e, 677 0x010f, 0x0110, 0x0111, 0x0112, 0x0113, 0x0213, 0x0203, 0x0307, 678 0x0308, 0x0309, 0x030b, 0x0304, 0x0305, 0x0306, 0x030a, 0x0301, 679 0x0302, 0x0303, 0x0300, 0x0310, 0x0206, 0x0200, 0x003e, 0x010a, 680 0x010b, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 681 0x030e, 0x0702, 0x030d, 0x0200, 0x0701, 0x0205, 0x0114, 0x0603, 682 0x020b, 0x0601, 0x0602, 0x0117, 0x0600, 0x020a, 0x0115, 0x0116, 683 0x011a, 0x010c, 0x010d, 0x011b, 0x011c, 0x0110, 0x0311, 0x011d, 684 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 685 }, altgr_map[NR_KEYS] = { 686 0x0200, 0x0200, 0x0200, 0x0040, 0x0200, 0x0024, 0x0200, 0x0200, 687 0x007b, 0x005b, 0x005d, 0x007d, 0x005c, 0x0200, 0x0200, 0x0200, 688 0x0b71, 0x0b77, 0x0918, 0x0b72, 0x0b74, 0x0b79, 0x0b75, 0x0b69, 689 0x0b6f, 0x0b70, 0x0200, 0x007e, 0x0201, 0x0702, 0x0914, 0x0b73, 690 0x0917, 0x0919, 0x0b67, 0x0b68, 0x0b6a, 0x0b6b, 0x0b6c, 0x0200, 691 0x0200, 0x0200, 0x0700, 0x0200, 0x0b7a, 0x0b78, 0x0916, 0x0b76, 692 0x0915, 0x0b6e, 0x0b6d, 0x0200, 0x0200, 0x0200, 0x0700, 0x030c, 693 0x0703, 0x0200, 0x0207, 0x050c, 0x050d, 0x050e, 0x050f, 0x0510, 694 0x0511, 0x0512, 0x0513, 0x0514, 0x0515, 0x0208, 0x0202, 0x0911, 695 0x0912, 0x0913, 0x030b, 0x090e, 0x090f, 0x0910, 0x030a, 0x090b, 696 0x090c, 0x090d, 0x090a, 0x0310, 0x0206, 0x0200, 0x007c, 0x0516, 697 0x0517, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 698 0x030e, 0x0702, 0x030d, 0x0200, 0x0701, 0x0205, 0x0114, 0x0603, 699 0x0118, 0x0601, 0x0602, 0x0117, 0x0600, 0x0119, 0x0115, 0x0116, 700 0x011a, 0x010c, 0x010d, 0x011b, 0x011c, 0x0110, 0x0311, 0x011d, 701 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 702 }, ctrl_map[NR_KEYS] = { 703 0x0200, 0x0200, 0x0200, 0x0000, 0x001b, 0x001c, 0x001d, 0x001e, 704 0x001f, 0x007f, 0x0200, 0x0200, 0x001f, 0x0200, 0x0008, 0x0200, 705 0x0011, 0x0017, 0x0005, 0x0012, 0x0014, 0x0019, 0x0015, 0x0009, 706 0x000f, 0x0010, 0x001b, 0x001d, 0x0201, 0x0702, 0x0001, 0x0013, 707 0x0004, 0x0006, 0x0007, 0x0008, 0x000a, 0x000b, 0x000c, 0x0200, 708 0x0007, 0x0000, 0x0700, 0x001c, 0x001a, 0x0018, 0x0003, 0x0016, 709 0x0002, 0x000e, 0x000d, 0x0200, 0x020e, 0x007f, 0x0700, 0x030c, 710 0x0703, 0x0000, 0x0207, 0x0100, 0x0101, 0x0102, 0x0103, 0x0104, 711 0x0105, 0x0106, 0x0107, 0x0108, 0x0109, 0x0208, 0x0204, 0x0307, 712 0x0308, 0x0309, 0x030b, 0x0304, 0x0305, 0x0306, 0x030a, 0x0301, 713 0x0302, 0x0303, 0x0300, 0x0310, 0x0206, 0x0200, 0x0200, 0x010a, 714 0x010b, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 715 0x030e, 0x0702, 0x030d, 0x001c, 0x0701, 0x0205, 0x0114, 0x0603, 716 0x0118, 0x0601, 0x0602, 0x0117, 0x0600, 0x0119, 0x0115, 0x0116, 717 0x011a, 0x010c, 0x010d, 0x011b, 0x011c, 0x0110, 0x0311, 0x011d, 718 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 719 }; 720 721 const u_short * const linux_keytabs[] = { 722 plain_map, shift_map, altgr_map, altgr_map, ctrl_map 723 }; 724 #endif 725 726 static struct biosdisk_info * 727 fd2biosinfo(struct proc *p, struct file *fp) 728 { 729 struct vnode *vp; 730 const char *blkname; 731 char diskname[16]; 732 int i; 733 struct nativedisk_info *nip; 734 struct disklist *dl = x86_alldisks; 735 736 if (dl == NULL) 737 return NULL; 738 if (fp->f_type != DTYPE_VNODE) 739 return NULL; 740 vp = (struct vnode *)fp->f_data; 741 742 if (vp->v_type != VBLK) 743 return NULL; 744 745 blkname = devsw_blk2name(major(vp->v_rdev)); 746 snprintf(diskname, sizeof diskname, "%s%llu", blkname, 747 (unsigned long long)DISKUNIT(vp->v_rdev)); 748 749 for (i = 0; i < dl->dl_nnativedisks; i++) { 750 nip = &dl->dl_nativedisks[i]; 751 if (strcmp(diskname, nip->ni_devname)) 752 continue; 753 if (nip->ni_nmatches != 0) 754 return &dl->dl_biosdisks[nip->ni_biosmatches[0]]; 755 } 756 757 return NULL; 758 } 759 760 761 /* 762 * We come here in a last attempt to satisfy a Linux ioctl() call 763 */ 764 int 765 linux_machdepioctl(struct lwp *l, const struct linux_sys_ioctl_args *uap, register_t *retval) 766 { 767 /* { 768 syscallarg(int) fd; 769 syscallarg(u_long) com; 770 syscallarg(void *) data; 771 } */ 772 struct sys_ioctl_args bia; 773 u_long com; 774 int error, error1; 775 #if (NWSDISPLAY > 0) 776 struct vt_mode lvt; 777 struct kbentry kbe; 778 #endif 779 struct linux_hd_geometry hdg; 780 struct linux_hd_big_geometry hdg_big; 781 struct biosdisk_info *bip; 782 file_t *fp; 783 int fd; 784 struct disklabel label; 785 struct partinfo partp; 786 int (*ioctlf)(struct file *, u_long, void *); 787 u_long start, biostotal, realtotal; 788 u_char heads, sectors; 789 u_int cylinders; 790 struct ioctl_pt pt; 791 792 fd = SCARG(uap, fd); 793 SCARG(&bia, fd) = fd; 794 SCARG(&bia, data) = SCARG(uap, data); 795 com = SCARG(uap, com); 796 797 if ((fp = fd_getfile(fd)) == NULL) 798 return (EBADF); 799 800 switch (com) { 801 #if (NWSDISPLAY > 0) 802 case LINUX_KDGKBMODE: 803 com = KDGKBMODE; 804 break; 805 case LINUX_KDSKBMODE: 806 com = KDSKBMODE; 807 if ((unsigned)SCARG(uap, data) == LINUX_K_MEDIUMRAW) 808 SCARG(&bia, data) = (void *)K_RAW; 809 break; 810 case LINUX_KIOCSOUND: 811 SCARG(&bia, data) = 812 (void *)(((unsigned long)SCARG(&bia, data)) & 0xffff); 813 /* fall through */ 814 case LINUX_KDMKTONE: 815 com = KDMKTONE; 816 break; 817 case LINUX_KDSETMODE: 818 com = KDSETMODE; 819 break; 820 case LINUX_KDGETMODE: 821 /* KD_* values are equal to the wscons numbers */ 822 com = WSDISPLAYIO_GMODE; 823 break; 824 case LINUX_KDENABIO: 825 com = KDENABIO; 826 break; 827 case LINUX_KDDISABIO: 828 com = KDDISABIO; 829 break; 830 case LINUX_KDGETLED: 831 com = KDGETLED; 832 break; 833 case LINUX_KDSETLED: 834 com = KDSETLED; 835 break; 836 case LINUX_VT_OPENQRY: 837 com = VT_OPENQRY; 838 break; 839 case LINUX_VT_GETMODE: 840 error = fp->f_ops->fo_ioctl(fp, VT_GETMODE, &lvt); 841 if (error != 0) 842 goto out; 843 lvt.relsig = native_to_linux_signo[lvt.relsig]; 844 lvt.acqsig = native_to_linux_signo[lvt.acqsig]; 845 lvt.frsig = native_to_linux_signo[lvt.frsig]; 846 error = copyout(&lvt, SCARG(uap, data), sizeof (lvt)); 847 goto out; 848 case LINUX_VT_SETMODE: 849 error = copyin(SCARG(uap, data), &lvt, sizeof (lvt)); 850 if (error != 0) 851 goto out; 852 lvt.relsig = linux_to_native_signo[lvt.relsig]; 853 lvt.acqsig = linux_to_native_signo[lvt.acqsig]; 854 lvt.frsig = linux_to_native_signo[lvt.frsig]; 855 error = fp->f_ops->fo_ioctl(fp, VT_SETMODE, &lvt); 856 goto out; 857 case LINUX_VT_DISALLOCATE: 858 /* XXX should use WSDISPLAYIO_DELSCREEN */ 859 error = 0; 860 goto out; 861 case LINUX_VT_RELDISP: 862 com = VT_RELDISP; 863 break; 864 case LINUX_VT_ACTIVATE: 865 com = VT_ACTIVATE; 866 break; 867 case LINUX_VT_WAITACTIVE: 868 com = VT_WAITACTIVE; 869 break; 870 case LINUX_VT_GETSTATE: 871 com = VT_GETSTATE; 872 break; 873 case LINUX_KDGKBTYPE: 874 { 875 static const u_int8_t kb101 = KB_101; 876 877 /* This is what Linux does. */ 878 error = copyout(&kb101, SCARG(uap, data), 1); 879 goto out; 880 } 881 case LINUX_KDGKBENT: 882 /* 883 * The Linux KDGKBENT ioctl is different from the 884 * SYSV original. So we handle it in machdep code. 885 * XXX We should use keyboard mapping information 886 * from wsdisplay, but this would be expensive. 887 */ 888 if ((error = copyin(SCARG(uap, data), &kbe, 889 sizeof(struct kbentry)))) 890 goto out; 891 if (kbe.kb_table >= sizeof(linux_keytabs) / sizeof(u_short *) 892 || kbe.kb_index >= NR_KEYS) { 893 error = EINVAL; 894 goto out; 895 } 896 kbe.kb_value = linux_keytabs[kbe.kb_table][kbe.kb_index]; 897 error = copyout(&kbe, SCARG(uap, data), 898 sizeof(struct kbentry)); 899 goto out; 900 #endif 901 case LINUX_HDIO_GETGEO: 902 case LINUX_HDIO_GETGEO_BIG: 903 /* 904 * Try to mimic Linux behaviour: return the BIOS geometry 905 * if possible (extending its # of cylinders if it's beyond 906 * the 1023 limit), fall back to the MI geometry (i.e. 907 * the real geometry) if not found, by returning an 908 * error. See common/linux_hdio.c 909 */ 910 bip = fd2biosinfo(curproc, fp); 911 ioctlf = fp->f_ops->fo_ioctl; 912 error = ioctlf(fp, DIOCGDINFO, (void *)&label); 913 error1 = ioctlf(fp, DIOCGPARTINFO, (void *)&partp); 914 if (error != 0 && error1 != 0) { 915 error = error1; 916 goto out; 917 } 918 start = error1 != 0 ? partp.pi_offset : 0; 919 if (bip != NULL && bip->bi_head != 0 && bip->bi_sec != 0 920 && bip->bi_cyl != 0) { 921 heads = bip->bi_head; 922 sectors = bip->bi_sec; 923 cylinders = bip->bi_cyl; 924 biostotal = heads * sectors * cylinders; 925 realtotal = label.d_ntracks * label.d_nsectors * 926 label.d_ncylinders; 927 if (realtotal > biostotal) 928 cylinders = realtotal / (heads * sectors); 929 } else { 930 heads = label.d_ntracks; 931 cylinders = label.d_ncylinders; 932 sectors = label.d_nsectors; 933 } 934 if (com == LINUX_HDIO_GETGEO) { 935 hdg.start = start; 936 hdg.heads = heads; 937 hdg.cylinders = cylinders; 938 hdg.sectors = sectors; 939 error = copyout(&hdg, SCARG(uap, data), sizeof hdg); 940 goto out; 941 } else { 942 hdg_big.start = start; 943 hdg_big.heads = heads; 944 hdg_big.cylinders = cylinders; 945 hdg_big.sectors = sectors; 946 error = copyout(&hdg_big, SCARG(uap, data), 947 sizeof hdg_big); 948 goto out; 949 } 950 951 default: 952 /* 953 * Unknown to us. If it's on a device, just pass it through 954 * using PTIOCLINUX, the device itself might be able to 955 * make some sense of it. 956 * XXX hack: if the function returns EJUSTRETURN, 957 * it has stuffed a sysctl return value in pt.data. 958 */ 959 ioctlf = fp->f_ops->fo_ioctl; 960 pt.com = SCARG(uap, com); 961 pt.data = SCARG(uap, data); 962 error = ioctlf(fp, PTIOCLINUX, &pt); 963 if (error == EJUSTRETURN) { 964 retval[0] = (register_t)pt.data; 965 error = 0; 966 } 967 968 if (error == ENOTTY) { 969 DPRINTF(("linux_machdepioctl: invalid ioctl %08lx\n", 970 com)); 971 } 972 goto out; 973 } 974 SCARG(&bia, com) = com; 975 error = sys_ioctl(curlwp, &bia, retval); 976 out: 977 fd_putfile(fd); 978 return error; 979 } 980 981 /* 982 * Set I/O permissions for a process. Just set the maximum level 983 * right away (ignoring the argument), otherwise we would have 984 * to rely on I/O permission maps, which are not implemented. 985 */ 986 int 987 linux_sys_iopl(struct lwp *l, const struct linux_sys_iopl_args *uap, register_t *retval) 988 { 989 /* { 990 syscallarg(int) level; 991 } */ 992 struct trapframe *fp = l->l_md.md_regs; 993 994 if (kauth_authorize_machdep(l->l_cred, KAUTH_MACHDEP_IOPL, 995 NULL, NULL, NULL, NULL) != 0) 996 return EPERM; 997 fp->tf_eflags |= PSL_IOPL; 998 *retval = 0; 999 return 0; 1000 } 1001 1002 /* 1003 * See above. If a root process tries to set access to an I/O port, 1004 * just let it have the whole range. 1005 */ 1006 int 1007 linux_sys_ioperm(struct lwp *l, const struct linux_sys_ioperm_args *uap, register_t *retval) 1008 { 1009 /* { 1010 syscallarg(unsigned int) lo; 1011 syscallarg(unsigned int) hi; 1012 syscallarg(int) val; 1013 } */ 1014 struct trapframe *fp = l->l_md.md_regs; 1015 1016 if (kauth_authorize_machdep(l->l_cred, SCARG(uap, val) ? 1017 KAUTH_MACHDEP_IOPERM_SET : KAUTH_MACHDEP_IOPERM_GET, NULL, NULL, 1018 NULL, NULL) != 0) 1019 return EPERM; 1020 if (SCARG(uap, val)) 1021 fp->tf_eflags |= PSL_IOPL; 1022 *retval = 0; 1023 return 0; 1024 } 1025 1026 int 1027 linux_usertrap(struct lwp *l, vaddr_t trapaddr, 1028 void *arg) 1029 { 1030 return 0; 1031 } 1032 1033 const char * 1034 linux_get_uname_arch(void) 1035 { 1036 static char uname_arch[5] = "i386"; 1037 1038 if (uname_arch[1] == '3') 1039 uname_arch[1] += cpu_class; 1040 return uname_arch; 1041 } 1042