1 /* $NetBSD: linux_machdep.c,v 1.169 2021/11/01 05:07:16 thorpej Exp $ */ 2 3 /*- 4 * Copyright (c) 1995, 2000, 2008, 2009 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Frank van der Linden, and by Andrew Doran. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 29 * POSSIBILITY OF SUCH DAMAGE. 30 */ 31 32 #include <sys/cdefs.h> 33 __KERNEL_RCSID(0, "$NetBSD: linux_machdep.c,v 1.169 2021/11/01 05:07:16 thorpej Exp $"); 34 35 #if defined(_KERNEL_OPT) 36 #include "opt_user_ldt.h" 37 #endif 38 39 #include <sys/param.h> 40 #include <sys/systm.h> 41 #include <sys/signalvar.h> 42 #include <sys/kernel.h> 43 #include <sys/proc.h> 44 #include <sys/buf.h> 45 #include <sys/reboot.h> 46 #include <sys/conf.h> 47 #include <sys/exec.h> 48 #include <sys/file.h> 49 #include <sys/callout.h> 50 #include <sys/mbuf.h> 51 #include <sys/msgbuf.h> 52 #include <sys/mount.h> 53 #include <sys/vnode.h> 54 #include <sys/device.h> 55 #include <sys/syscallargs.h> 56 #include <sys/filedesc.h> 57 #include <sys/exec_elf.h> 58 #include <sys/disklabel.h> 59 #include <sys/ioctl.h> 60 #include <sys/wait.h> 61 #include <sys/kauth.h> 62 #include <sys/kmem.h> 63 64 #include <miscfs/specfs/specdev.h> 65 66 #include <compat/linux/common/linux_types.h> 67 #include <compat/linux/common/linux_signal.h> 68 #include <compat/linux/common/linux_util.h> 69 #include <compat/linux/common/linux_ioctl.h> 70 #include <compat/linux/common/linux_hdio.h> 71 #include <compat/linux/common/linux_exec.h> 72 #include <compat/linux/common/linux_machdep.h> 73 #include <compat/linux/common/linux_errno.h> 74 75 #include <compat/linux/linux_syscallargs.h> 76 77 #include <sys/cpu.h> 78 #include <machine/cpufunc.h> 79 #include <machine/psl.h> 80 #include <machine/reg.h> 81 #include <machine/segments.h> 82 #include <machine/specialreg.h> 83 #include <machine/sysarch.h> 84 #include <machine/vmparam.h> 85 86 #include <x86/fpu.h> 87 88 /* 89 * To see whether wscons is configured (for virtual console ioctl calls). 90 */ 91 #if defined(_KERNEL_OPT) 92 #include "wsdisplay.h" 93 #endif 94 #if (NWSDISPLAY > 0) 95 #include <dev/wscons/wsconsio.h> 96 #include <dev/wscons/wsdisplay_usl_io.h> 97 #if defined(_KERNEL_OPT) 98 #include "opt_xserver.h" 99 #endif 100 #endif 101 102 #ifdef DEBUG_LINUX 103 #define DPRINTF(a) uprintf a 104 #else 105 #define DPRINTF(a) 106 #endif 107 108 extern struct disklist *x86_alldisks; 109 110 static struct biosdisk_info *fd2biosinfo(struct proc *, struct file *); 111 static void linux_save_ucontext(struct lwp *, struct trapframe *, 112 const sigset_t *, stack_t *, struct linux_ucontext *); 113 static void linux_save_sigcontext(struct lwp *, struct trapframe *, 114 const sigset_t *, struct linux_sigcontext *); 115 static int linux_restore_sigcontext(struct lwp *, 116 struct linux_sigcontext *, register_t *); 117 static void linux_rt_sendsig(const ksiginfo_t *, const sigset_t *); 118 static void linux_old_sendsig(const ksiginfo_t *, const sigset_t *); 119 120 extern char linux_sigcode[], linux_rt_sigcode[]; 121 122 /* 123 * Deal with some i386-specific things in the Linux emulation code. 124 */ 125 126 void 127 linux_setregs(struct lwp *l, struct exec_package *epp, vaddr_t stack) 128 { 129 struct trapframe *tf; 130 131 #ifdef USER_LDT 132 pmap_ldt_cleanup(l); 133 #endif 134 135 fpu_clear(l, __Linux_NPXCW__); 136 137 tf = l->l_md.md_regs; 138 tf->tf_gs = 0; 139 tf->tf_fs = GSEL(GUDATA_SEL, SEL_UPL); 140 tf->tf_es = GSEL(GUDATA_SEL, SEL_UPL); 141 tf->tf_ds = GSEL(GUDATA_SEL, SEL_UPL); 142 tf->tf_edi = 0; 143 tf->tf_esi = 0; 144 tf->tf_ebp = 0; 145 tf->tf_ebx = l->l_proc->p_psstrp; 146 tf->tf_edx = 0; 147 tf->tf_ecx = 0; 148 tf->tf_eax = 0; 149 tf->tf_eip = epp->ep_entry; 150 tf->tf_cs = GSEL(GUCODEBIG_SEL, SEL_UPL); 151 tf->tf_eflags = PSL_USERSET; 152 tf->tf_esp = stack; 153 tf->tf_ss = GSEL(GUDATA_SEL, SEL_UPL); 154 } 155 156 /* 157 * Send an interrupt to process. 158 * 159 * Stack is set up to allow sigcode stored 160 * in u. to call routine, followed by kcall 161 * to sigreturn routine below. After sigreturn 162 * resets the signal mask, the stack, and the 163 * frame pointer, it returns to the user 164 * specified pc, psl. 165 */ 166 167 void 168 linux_sendsig(const ksiginfo_t *ksi, const sigset_t *mask) 169 { 170 if (SIGACTION(curproc, ksi->ksi_signo).sa_flags & SA_SIGINFO) 171 linux_rt_sendsig(ksi, mask); 172 else 173 linux_old_sendsig(ksi, mask); 174 } 175 176 177 static void 178 linux_save_ucontext(struct lwp *l, struct trapframe *tf, const sigset_t *mask, 179 stack_t *sas, struct linux_ucontext *uc) 180 { 181 uc->uc_flags = 0; 182 uc->uc_link = NULL; 183 native_to_linux_sigaltstack(&uc->uc_stack, sas); 184 linux_save_sigcontext(l, tf, mask, &uc->uc_mcontext); 185 native_to_linux_sigset(&uc->uc_sigmask, mask); 186 (void)memset(&uc->uc_fpregs_mem, 0, sizeof(uc->uc_fpregs_mem)); 187 } 188 189 static void 190 linux_save_sigcontext(struct lwp *l, struct trapframe *tf, 191 const sigset_t *mask, struct linux_sigcontext *sc) 192 { 193 struct pcb *pcb = lwp_getpcb(l); 194 195 /* Save register context. */ 196 sc->sc_gs = tf->tf_gs; 197 sc->sc_fs = tf->tf_fs; 198 sc->sc_es = tf->tf_es; 199 sc->sc_ds = tf->tf_ds; 200 sc->sc_eflags = tf->tf_eflags; 201 202 sc->sc_edi = tf->tf_edi; 203 sc->sc_esi = tf->tf_esi; 204 sc->sc_esp = tf->tf_esp; 205 sc->sc_ebp = tf->tf_ebp; 206 sc->sc_ebx = tf->tf_ebx; 207 sc->sc_edx = tf->tf_edx; 208 sc->sc_ecx = tf->tf_ecx; 209 sc->sc_eax = tf->tf_eax; 210 sc->sc_eip = tf->tf_eip; 211 sc->sc_cs = tf->tf_cs; 212 sc->sc_esp_at_signal = tf->tf_esp; 213 sc->sc_ss = tf->tf_ss; 214 sc->sc_err = tf->tf_err; 215 sc->sc_trapno = tf->tf_trapno; 216 sc->sc_cr2 = pcb->pcb_cr2; 217 sc->sc_387 = NULL; 218 219 /* Save signal stack. */ 220 /* Linux doesn't save the onstack flag in sigframe */ 221 222 /* Save signal mask. */ 223 native_to_linux_old_sigset(&sc->sc_mask, mask); 224 } 225 226 static void 227 linux_rt_sendsig(const ksiginfo_t *ksi, const sigset_t *mask) 228 { 229 struct lwp *l = curlwp; 230 struct proc *p = l->l_proc; 231 struct trapframe *tf; 232 struct linux_rt_sigframe *fp, frame; 233 int onstack, error; 234 int sig = ksi->ksi_signo; 235 sig_t catcher = SIGACTION(p, sig).sa_handler; 236 stack_t *sas = &l->l_sigstk; 237 238 tf = l->l_md.md_regs; 239 /* Do we need to jump onto the signal stack? */ 240 onstack = (sas->ss_flags & (SS_DISABLE | SS_ONSTACK)) == 0 && 241 (SIGACTION(p, sig).sa_flags & SA_ONSTACK) != 0; 242 243 244 /* Allocate space for the signal handler context. */ 245 if (onstack) 246 fp = (struct linux_rt_sigframe *)((char *)sas->ss_sp + 247 sas->ss_size); 248 else 249 fp = (struct linux_rt_sigframe *)tf->tf_esp; 250 fp--; 251 252 DPRINTF(("rt: onstack = %d, fp = %p sig = %d eip = 0x%x cr2 = 0x%x\n", 253 onstack, fp, sig, tf->tf_eip, 254 ((struct pcb *)lwp_getpcb(l))->pcb_cr2)); 255 256 memset(&frame, 0, sizeof(frame)); 257 258 /* Build stack frame for signal trampoline. */ 259 frame.sf_handler = catcher; 260 frame.sf_sig = native_to_linux_signo[sig]; 261 frame.sf_sip = &fp->sf_si; 262 frame.sf_ucp = &fp->sf_uc; 263 264 /* 265 * XXX: the following code assumes that the constants for 266 * siginfo are the same between linux and NetBSD. 267 */ 268 native_to_linux_siginfo(&frame.sf_si, &ksi->ksi_info); 269 270 /* Save register context. */ 271 linux_save_ucontext(l, tf, mask, sas, &frame.sf_uc); 272 sendsig_reset(l, sig); 273 274 mutex_exit(p->p_lock); 275 error = copyout(&frame, fp, sizeof(frame)); 276 mutex_enter(p->p_lock); 277 278 if (error != 0) { 279 /* 280 * Process has trashed its stack; give it an illegal 281 * instruction to halt it in its tracks. 282 */ 283 sigexit(l, SIGILL); 284 /* NOTREACHED */ 285 } 286 287 /* 288 * Build context to run handler in. 289 */ 290 tf->tf_fs = GSEL(GUDATA_SEL, SEL_UPL); 291 tf->tf_es = GSEL(GUDATA_SEL, SEL_UPL); 292 tf->tf_ds = GSEL(GUDATA_SEL, SEL_UPL); 293 tf->tf_eip = ((int)p->p_sigctx.ps_sigcode) + 294 (linux_rt_sigcode - linux_sigcode); 295 tf->tf_cs = GSEL(GUCODE_SEL, SEL_UPL); 296 tf->tf_eflags &= ~PSL_CLEARSIG; 297 tf->tf_esp = (int)fp; 298 tf->tf_ss = GSEL(GUDATA_SEL, SEL_UPL); 299 300 /* Remember that we're now on the signal stack. */ 301 if (onstack) 302 sas->ss_flags |= SS_ONSTACK; 303 } 304 305 static void 306 linux_old_sendsig(const ksiginfo_t *ksi, const sigset_t *mask) 307 { 308 struct lwp *l = curlwp; 309 struct proc *p = l->l_proc; 310 struct trapframe *tf; 311 struct linux_sigframe *fp, frame; 312 int onstack, error; 313 int sig = ksi->ksi_signo; 314 sig_t catcher = SIGACTION(p, sig).sa_handler; 315 stack_t *sas = &l->l_sigstk; 316 317 tf = l->l_md.md_regs; 318 319 /* Do we need to jump onto the signal stack? */ 320 onstack = (sas->ss_flags & (SS_DISABLE | SS_ONSTACK)) == 0 && 321 (SIGACTION(p, sig).sa_flags & SA_ONSTACK) != 0; 322 323 /* Allocate space for the signal handler context. */ 324 if (onstack) 325 fp = (struct linux_sigframe *) ((char *)sas->ss_sp + 326 sas->ss_size); 327 else 328 fp = (struct linux_sigframe *)tf->tf_esp; 329 fp--; 330 331 DPRINTF(("old: onstack = %d, fp = %p sig = %d eip = 0x%x cr2 = 0x%x\n", 332 onstack, fp, sig, tf->tf_eip, 333 ((struct pcb *)lwp_getpcb(l))->pcb_cr2)); 334 335 memset(&frame, 0, sizeof(frame)); 336 337 /* Build stack frame for signal trampoline. */ 338 frame.sf_handler = catcher; 339 frame.sf_sig = native_to_linux_signo[sig]; 340 341 linux_save_sigcontext(l, tf, mask, &frame.sf_sc); 342 sendsig_reset(l, sig); 343 344 mutex_exit(p->p_lock); 345 error = copyout(&frame, fp, sizeof(frame)); 346 mutex_enter(p->p_lock); 347 348 if (error != 0) { 349 /* 350 * Process has trashed its stack; give it an illegal 351 * instruction to halt it in its tracks. 352 */ 353 sigexit(l, SIGILL); 354 /* NOTREACHED */ 355 } 356 357 /* 358 * Build context to run handler in. 359 */ 360 tf->tf_fs = GSEL(GUDATA_SEL, SEL_UPL); 361 tf->tf_es = GSEL(GUDATA_SEL, SEL_UPL); 362 tf->tf_ds = GSEL(GUDATA_SEL, SEL_UPL); 363 tf->tf_eip = (int)p->p_sigctx.ps_sigcode; 364 tf->tf_cs = GSEL(GUCODEBIG_SEL, SEL_UPL); 365 tf->tf_eflags &= ~PSL_CLEARSIG; 366 tf->tf_esp = (int)fp; 367 tf->tf_ss = GSEL(GUDATA_SEL, SEL_UPL); 368 369 /* Remember that we're now on the signal stack. */ 370 if (onstack) 371 sas->ss_flags |= SS_ONSTACK; 372 } 373 374 /* 375 * System call to cleanup state after a signal 376 * has been taken. Reset signal mask and 377 * stack state from context left by sendsig (above). 378 * Return to previous pc and psl as specified by 379 * context left by sendsig. Check carefully to 380 * make sure that the user has not modified the 381 * psl to gain improper privileges or to cause 382 * a machine fault. 383 */ 384 int 385 linux_sys_rt_sigreturn(struct lwp *l, const struct linux_sys_rt_sigreturn_args *uap, register_t *retval) 386 { 387 /* { 388 syscallarg(struct linux_ucontext *) ucp; 389 } */ 390 struct linux_ucontext context, *ucp = SCARG(uap, ucp); 391 int error; 392 393 /* 394 * The trampoline code hands us the context. 395 * It is unsafe to keep track of it ourselves, in the event that a 396 * program jumps out of a signal handler. 397 */ 398 if ((error = copyin(ucp, &context, sizeof(*ucp))) != 0) 399 return error; 400 401 /* XXX XAX we can do better here by using more of the ucontext */ 402 return linux_restore_sigcontext(l, &context.uc_mcontext, retval); 403 } 404 405 int 406 linux_sys_sigreturn(struct lwp *l, const struct linux_sys_sigreturn_args *uap, register_t *retval) 407 { 408 /* { 409 syscallarg(struct linux_sigcontext *) scp; 410 } */ 411 struct linux_sigcontext context, *scp = SCARG(uap, scp); 412 int error; 413 414 /* 415 * The trampoline code hands us the context. 416 * It is unsafe to keep track of it ourselves, in the event that a 417 * program jumps out of a signal handler. 418 */ 419 if ((error = copyin((void *)scp, &context, sizeof(*scp))) != 0) 420 return error; 421 return linux_restore_sigcontext(l, &context, retval); 422 } 423 424 static int 425 linux_restore_sigcontext(struct lwp *l, struct linux_sigcontext *scp, 426 register_t *retval) 427 { 428 struct proc *p = l->l_proc; 429 stack_t *sas = &l->l_sigstk; 430 struct trapframe *tf; 431 sigset_t mask; 432 ssize_t ss_gap; 433 434 /* Restore register context. */ 435 tf = l->l_md.md_regs; 436 DPRINTF(("sigreturn enter esp=0x%x eip=0x%x\n", tf->tf_esp, tf->tf_eip)); 437 438 /* 439 * Check for security violations. If we're returning to 440 * protected mode, the CPU will validate the segment registers 441 * automatically and generate a trap on violations. We handle 442 * the trap, rather than doing all of the checking here. 443 */ 444 if (((scp->sc_eflags ^ tf->tf_eflags) & PSL_USERSTATIC) != 0 || 445 !USERMODE(scp->sc_cs)) 446 return EINVAL; 447 448 tf->tf_gs = scp->sc_gs; 449 tf->tf_fs = scp->sc_fs; 450 tf->tf_es = scp->sc_es; 451 tf->tf_ds = scp->sc_ds; 452 tf->tf_eflags = scp->sc_eflags; 453 454 tf->tf_edi = scp->sc_edi; 455 tf->tf_esi = scp->sc_esi; 456 tf->tf_ebp = scp->sc_ebp; 457 tf->tf_ebx = scp->sc_ebx; 458 tf->tf_edx = scp->sc_edx; 459 tf->tf_ecx = scp->sc_ecx; 460 tf->tf_eax = scp->sc_eax; 461 tf->tf_eip = scp->sc_eip; 462 tf->tf_cs = scp->sc_cs; 463 tf->tf_esp = scp->sc_esp_at_signal; 464 tf->tf_ss = scp->sc_ss; 465 466 /* Restore signal stack. */ 467 /* 468 * Linux really does it this way; it doesn't have space in sigframe 469 * to save the onstack flag. 470 */ 471 mutex_enter(p->p_lock); 472 ss_gap = (ssize_t)((char *)scp->sc_esp_at_signal - (char *)sas->ss_sp); 473 if (ss_gap >= 0 && ss_gap < sas->ss_size) 474 sas->ss_flags |= SS_ONSTACK; 475 else 476 sas->ss_flags &= ~SS_ONSTACK; 477 478 /* Restore signal mask. */ 479 linux_old_to_native_sigset(&mask, &scp->sc_mask); 480 (void) sigprocmask1(l, SIG_SETMASK, &mask, 0); 481 mutex_exit(p->p_lock); 482 483 DPRINTF(("sigreturn exit esp=0x%x eip=0x%x\n", tf->tf_esp, tf->tf_eip)); 484 return EJUSTRETURN; 485 } 486 487 #ifdef USER_LDT 488 489 static int 490 linux_read_ldt(struct lwp *l, const struct linux_sys_modify_ldt_args *uap, 491 register_t *retval) 492 { 493 struct x86_get_ldt_args gl; 494 int error; 495 union descriptor *ldt_buf; 496 size_t sz; 497 498 /* 499 * I've checked the linux code - this function is asymmetric with 500 * linux_write_ldt, and returns raw ldt entries. 501 * NB, the code I saw zerod the spare parts of the user buffer. 502 */ 503 504 DPRINTF(("linux_read_ldt!")); 505 506 sz = 8192 * sizeof(*ldt_buf); 507 ldt_buf = kmem_zalloc(sz, KM_SLEEP); 508 gl.start = 0; 509 gl.desc = NULL; 510 gl.num = SCARG(uap, bytecount) / sizeof(union descriptor); 511 error = x86_get_ldt1(l, &gl, ldt_buf); 512 /* NB gl.num might have changed */ 513 if (error == 0) { 514 *retval = gl.num * sizeof(*ldtstore); 515 error = copyout(ldt_buf, SCARG(uap, ptr), 516 gl.num * sizeof *ldt_buf); 517 } 518 kmem_free(ldt_buf, sz); 519 520 return error; 521 } 522 523 struct linux_ldt_info { 524 u_int entry_number; 525 u_long base_addr; 526 u_int limit; 527 u_int seg_32bit:1; 528 u_int contents:2; 529 u_int read_exec_only:1; 530 u_int limit_in_pages:1; 531 u_int seg_not_present:1; 532 u_int useable:1; 533 }; 534 535 static int 536 linux_write_ldt(struct lwp *l, const struct linux_sys_modify_ldt_args *uap, 537 int oldmode) 538 { 539 struct linux_ldt_info ldt_info; 540 union descriptor d; 541 struct x86_set_ldt_args sl; 542 int error; 543 544 DPRINTF(("linux_write_ldt %d\n", oldmode)); 545 if (SCARG(uap, bytecount) != sizeof(ldt_info)) 546 return (EINVAL); 547 if ((error = copyin(SCARG(uap, ptr), &ldt_info, sizeof(ldt_info))) != 0) 548 return error; 549 if (ldt_info.entry_number >= 8192) 550 return (EINVAL); 551 if (ldt_info.contents == 3) { 552 if (oldmode) 553 return (EINVAL); 554 if (ldt_info.seg_not_present) 555 return (EINVAL); 556 } 557 558 if (ldt_info.base_addr == 0 && ldt_info.limit == 0 && 559 (oldmode || (ldt_info.contents == 0 && 560 ldt_info.read_exec_only == 1 && ldt_info.seg_32bit == 0 && 561 ldt_info.limit_in_pages == 0 && ldt_info.seg_not_present == 1 && 562 ldt_info.useable == 0))) { 563 /* this means you should zero the ldt */ 564 (void)memset(&d, 0, sizeof(d)); 565 } else { 566 d.sd.sd_lobase = ldt_info.base_addr & 0xffffff; 567 d.sd.sd_hibase = (ldt_info.base_addr >> 24) & 0xff; 568 d.sd.sd_lolimit = ldt_info.limit & 0xffff; 569 d.sd.sd_hilimit = (ldt_info.limit >> 16) & 0xf; 570 d.sd.sd_type = 16 | (ldt_info.contents << 2) | 571 (!ldt_info.read_exec_only << 1); 572 d.sd.sd_dpl = SEL_UPL; 573 d.sd.sd_p = !ldt_info.seg_not_present; 574 d.sd.sd_def32 = ldt_info.seg_32bit; 575 d.sd.sd_gran = ldt_info.limit_in_pages; 576 if (!oldmode) 577 d.sd.sd_xx = ldt_info.useable; 578 else 579 d.sd.sd_xx = 0; 580 } 581 sl.start = ldt_info.entry_number; 582 sl.desc = NULL; 583 sl.num = 1; 584 585 DPRINTF(("linux_write_ldt: idx=%d, base=0x%lx, limit=0x%x\n", 586 ldt_info.entry_number, ldt_info.base_addr, ldt_info.limit)); 587 588 return x86_set_ldt1(l, &sl, &d); 589 } 590 591 #endif /* USER_LDT */ 592 593 int 594 linux_sys_modify_ldt(struct lwp *l, const struct linux_sys_modify_ldt_args *uap, register_t *retval) 595 { 596 /* { 597 syscallarg(int) func; 598 syscallarg(void *) ptr; 599 syscallarg(size_t) bytecount; 600 } */ 601 602 switch (SCARG(uap, func)) { 603 #ifdef USER_LDT 604 case 0: 605 return linux_read_ldt(l, (const void *)uap, retval); 606 case 1: 607 return linux_write_ldt(l, (const void *)uap, 1); 608 case 2: 609 #ifdef notyet 610 return linux_read_default_ldt(l, (const void *)uap, retval); 611 #else 612 return (ENOSYS); 613 #endif 614 case 0x11: 615 return linux_write_ldt(l, (const void *)uap, 0); 616 #endif /* USER_LDT */ 617 618 default: 619 return (ENOSYS); 620 } 621 } 622 623 /* 624 * XXX Pathetic hack to make svgalib work. This will fake the major 625 * device number of an opened VT so that svgalib likes it. grmbl. 626 * Should probably do it 'wrong the right way' and use a mapping 627 * array for all major device numbers, and map linux_mknod too. 628 */ 629 dev_t 630 linux_fakedev(dev_t dev, int raw) 631 { 632 extern const struct cdevsw ptc_cdevsw, pts_cdevsw; 633 const struct cdevsw *cd = cdevsw_lookup(dev); 634 635 if (raw) { 636 #if (NWSDISPLAY > 0) 637 extern const struct cdevsw wsdisplay_cdevsw; 638 if (cd == &wsdisplay_cdevsw) 639 return makedev(LINUX_CONS_MAJOR, (minor(dev) + 1)); 640 #endif 641 } 642 643 if (cd == &ptc_cdevsw) 644 return makedev(LINUX_PTC_MAJOR, minor(dev)); 645 if (cd == &pts_cdevsw) 646 return makedev(LINUX_PTS_MAJOR, minor(dev)); 647 648 return dev; 649 } 650 651 #if (NWSDISPLAY > 0) 652 /* 653 * That's not complete, but enough to get an X server running. 654 */ 655 #define NR_KEYS 128 656 static const u_short plain_map[NR_KEYS] = { 657 0x0200, 0x001b, 0x0031, 0x0032, 0x0033, 0x0034, 0x0035, 0x0036, 658 0x0037, 0x0038, 0x0039, 0x0030, 0x002d, 0x003d, 0x007f, 0x0009, 659 0x0b71, 0x0b77, 0x0b65, 0x0b72, 0x0b74, 0x0b79, 0x0b75, 0x0b69, 660 0x0b6f, 0x0b70, 0x005b, 0x005d, 0x0201, 0x0702, 0x0b61, 0x0b73, 661 0x0b64, 0x0b66, 0x0b67, 0x0b68, 0x0b6a, 0x0b6b, 0x0b6c, 0x003b, 662 0x0027, 0x0060, 0x0700, 0x005c, 0x0b7a, 0x0b78, 0x0b63, 0x0b76, 663 0x0b62, 0x0b6e, 0x0b6d, 0x002c, 0x002e, 0x002f, 0x0700, 0x030c, 664 0x0703, 0x0020, 0x0207, 0x0100, 0x0101, 0x0102, 0x0103, 0x0104, 665 0x0105, 0x0106, 0x0107, 0x0108, 0x0109, 0x0208, 0x0209, 0x0307, 666 0x0308, 0x0309, 0x030b, 0x0304, 0x0305, 0x0306, 0x030a, 0x0301, 667 0x0302, 0x0303, 0x0300, 0x0310, 0x0206, 0x0200, 0x003c, 0x010a, 668 0x010b, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 669 0x030e, 0x0702, 0x030d, 0x001c, 0x0701, 0x0205, 0x0114, 0x0603, 670 0x0118, 0x0601, 0x0602, 0x0117, 0x0600, 0x0119, 0x0115, 0x0116, 671 0x011a, 0x010c, 0x010d, 0x011b, 0x011c, 0x0110, 0x0311, 0x011d, 672 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 673 }, shift_map[NR_KEYS] = { 674 0x0200, 0x001b, 0x0021, 0x0040, 0x0023, 0x0024, 0x0025, 0x005e, 675 0x0026, 0x002a, 0x0028, 0x0029, 0x005f, 0x002b, 0x007f, 0x0009, 676 0x0b51, 0x0b57, 0x0b45, 0x0b52, 0x0b54, 0x0b59, 0x0b55, 0x0b49, 677 0x0b4f, 0x0b50, 0x007b, 0x007d, 0x0201, 0x0702, 0x0b41, 0x0b53, 678 0x0b44, 0x0b46, 0x0b47, 0x0b48, 0x0b4a, 0x0b4b, 0x0b4c, 0x003a, 679 0x0022, 0x007e, 0x0700, 0x007c, 0x0b5a, 0x0b58, 0x0b43, 0x0b56, 680 0x0b42, 0x0b4e, 0x0b4d, 0x003c, 0x003e, 0x003f, 0x0700, 0x030c, 681 0x0703, 0x0020, 0x0207, 0x010a, 0x010b, 0x010c, 0x010d, 0x010e, 682 0x010f, 0x0110, 0x0111, 0x0112, 0x0113, 0x0213, 0x0203, 0x0307, 683 0x0308, 0x0309, 0x030b, 0x0304, 0x0305, 0x0306, 0x030a, 0x0301, 684 0x0302, 0x0303, 0x0300, 0x0310, 0x0206, 0x0200, 0x003e, 0x010a, 685 0x010b, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 686 0x030e, 0x0702, 0x030d, 0x0200, 0x0701, 0x0205, 0x0114, 0x0603, 687 0x020b, 0x0601, 0x0602, 0x0117, 0x0600, 0x020a, 0x0115, 0x0116, 688 0x011a, 0x010c, 0x010d, 0x011b, 0x011c, 0x0110, 0x0311, 0x011d, 689 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 690 }, altgr_map[NR_KEYS] = { 691 0x0200, 0x0200, 0x0200, 0x0040, 0x0200, 0x0024, 0x0200, 0x0200, 692 0x007b, 0x005b, 0x005d, 0x007d, 0x005c, 0x0200, 0x0200, 0x0200, 693 0x0b71, 0x0b77, 0x0918, 0x0b72, 0x0b74, 0x0b79, 0x0b75, 0x0b69, 694 0x0b6f, 0x0b70, 0x0200, 0x007e, 0x0201, 0x0702, 0x0914, 0x0b73, 695 0x0917, 0x0919, 0x0b67, 0x0b68, 0x0b6a, 0x0b6b, 0x0b6c, 0x0200, 696 0x0200, 0x0200, 0x0700, 0x0200, 0x0b7a, 0x0b78, 0x0916, 0x0b76, 697 0x0915, 0x0b6e, 0x0b6d, 0x0200, 0x0200, 0x0200, 0x0700, 0x030c, 698 0x0703, 0x0200, 0x0207, 0x050c, 0x050d, 0x050e, 0x050f, 0x0510, 699 0x0511, 0x0512, 0x0513, 0x0514, 0x0515, 0x0208, 0x0202, 0x0911, 700 0x0912, 0x0913, 0x030b, 0x090e, 0x090f, 0x0910, 0x030a, 0x090b, 701 0x090c, 0x090d, 0x090a, 0x0310, 0x0206, 0x0200, 0x007c, 0x0516, 702 0x0517, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 703 0x030e, 0x0702, 0x030d, 0x0200, 0x0701, 0x0205, 0x0114, 0x0603, 704 0x0118, 0x0601, 0x0602, 0x0117, 0x0600, 0x0119, 0x0115, 0x0116, 705 0x011a, 0x010c, 0x010d, 0x011b, 0x011c, 0x0110, 0x0311, 0x011d, 706 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 707 }, ctrl_map[NR_KEYS] = { 708 0x0200, 0x0200, 0x0200, 0x0000, 0x001b, 0x001c, 0x001d, 0x001e, 709 0x001f, 0x007f, 0x0200, 0x0200, 0x001f, 0x0200, 0x0008, 0x0200, 710 0x0011, 0x0017, 0x0005, 0x0012, 0x0014, 0x0019, 0x0015, 0x0009, 711 0x000f, 0x0010, 0x001b, 0x001d, 0x0201, 0x0702, 0x0001, 0x0013, 712 0x0004, 0x0006, 0x0007, 0x0008, 0x000a, 0x000b, 0x000c, 0x0200, 713 0x0007, 0x0000, 0x0700, 0x001c, 0x001a, 0x0018, 0x0003, 0x0016, 714 0x0002, 0x000e, 0x000d, 0x0200, 0x020e, 0x007f, 0x0700, 0x030c, 715 0x0703, 0x0000, 0x0207, 0x0100, 0x0101, 0x0102, 0x0103, 0x0104, 716 0x0105, 0x0106, 0x0107, 0x0108, 0x0109, 0x0208, 0x0204, 0x0307, 717 0x0308, 0x0309, 0x030b, 0x0304, 0x0305, 0x0306, 0x030a, 0x0301, 718 0x0302, 0x0303, 0x0300, 0x0310, 0x0206, 0x0200, 0x0200, 0x010a, 719 0x010b, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 720 0x030e, 0x0702, 0x030d, 0x001c, 0x0701, 0x0205, 0x0114, 0x0603, 721 0x0118, 0x0601, 0x0602, 0x0117, 0x0600, 0x0119, 0x0115, 0x0116, 722 0x011a, 0x010c, 0x010d, 0x011b, 0x011c, 0x0110, 0x0311, 0x011d, 723 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 724 }; 725 726 const u_short * const linux_keytabs[] = { 727 plain_map, shift_map, altgr_map, altgr_map, ctrl_map 728 }; 729 #endif 730 731 static struct biosdisk_info * 732 fd2biosinfo(struct proc *p, struct file *fp) 733 { 734 struct vnode *vp; 735 const char *blkname; 736 char diskname[16]; 737 int i; 738 struct nativedisk_info *nip; 739 struct disklist *dl = x86_alldisks; 740 741 if (dl == NULL) 742 return NULL; 743 if (fp->f_type != DTYPE_VNODE) 744 return NULL; 745 vp = (struct vnode *)fp->f_data; 746 747 if (vp->v_type != VBLK) 748 return NULL; 749 750 blkname = devsw_blk2name(major(vp->v_rdev)); 751 snprintf(diskname, sizeof diskname, "%s%llu", blkname, 752 (unsigned long long)DISKUNIT(vp->v_rdev)); 753 754 for (i = 0; i < dl->dl_nnativedisks; i++) { 755 nip = &dl->dl_nativedisks[i]; 756 if (strcmp(diskname, nip->ni_devname)) 757 continue; 758 if (nip->ni_nmatches != 0) 759 return &dl->dl_biosdisks[nip->ni_biosmatches[0]]; 760 } 761 762 return NULL; 763 } 764 765 766 /* 767 * We come here in a last attempt to satisfy a Linux ioctl() call 768 */ 769 int 770 linux_machdepioctl(struct lwp *l, const struct linux_sys_ioctl_args *uap, register_t *retval) 771 { 772 /* { 773 syscallarg(int) fd; 774 syscallarg(u_long) com; 775 syscallarg(void *) data; 776 } */ 777 struct sys_ioctl_args bia; 778 u_long com; 779 int error, error1; 780 #if (NWSDISPLAY > 0) 781 struct vt_mode lvt; 782 struct kbentry kbe; 783 #endif 784 struct linux_hd_geometry hdg; 785 struct linux_hd_big_geometry hdg_big; 786 struct biosdisk_info *bip; 787 file_t *fp; 788 int fd; 789 struct disklabel label; 790 struct partinfo partp; 791 int (*ioctlf)(struct file *, u_long, void *); 792 u_long start, biostotal, realtotal; 793 u_char heads, sectors; 794 u_int cylinders; 795 struct ioctl_pt pt; 796 797 fd = SCARG(uap, fd); 798 SCARG(&bia, fd) = fd; 799 SCARG(&bia, data) = SCARG(uap, data); 800 com = SCARG(uap, com); 801 802 if ((fp = fd_getfile(fd)) == NULL) 803 return (EBADF); 804 805 switch (com) { 806 #if (NWSDISPLAY > 0) 807 case LINUX_KDGKBMODE: 808 com = KDGKBMODE; 809 break; 810 case LINUX_KDSKBMODE: 811 com = KDSKBMODE; 812 if ((unsigned)SCARG(uap, data) == LINUX_K_MEDIUMRAW) 813 SCARG(&bia, data) = (void *)K_RAW; 814 break; 815 case LINUX_KIOCSOUND: 816 SCARG(&bia, data) = 817 (void *)(((unsigned long)SCARG(&bia, data)) & 0xffff); 818 /* fall through */ 819 case LINUX_KDMKTONE: 820 com = KDMKTONE; 821 break; 822 case LINUX_KDSETMODE: 823 com = KDSETMODE; 824 break; 825 case LINUX_KDGETMODE: 826 /* KD_* values are equal to the wscons numbers */ 827 com = WSDISPLAYIO_GMODE; 828 break; 829 case LINUX_KDENABIO: 830 com = KDENABIO; 831 break; 832 case LINUX_KDDISABIO: 833 com = KDDISABIO; 834 break; 835 case LINUX_KDGETLED: 836 com = KDGETLED; 837 break; 838 case LINUX_KDSETLED: 839 com = KDSETLED; 840 break; 841 case LINUX_VT_OPENQRY: 842 com = VT_OPENQRY; 843 break; 844 case LINUX_VT_GETMODE: 845 memset(&lvt, 0, sizeof(lvt)); 846 error = fp->f_ops->fo_ioctl(fp, VT_GETMODE, &lvt); 847 if (error != 0) 848 goto out; 849 lvt.relsig = native_to_linux_signo[lvt.relsig]; 850 lvt.acqsig = native_to_linux_signo[lvt.acqsig]; 851 lvt.frsig = native_to_linux_signo[lvt.frsig]; 852 error = copyout(&lvt, SCARG(uap, data), sizeof (lvt)); 853 goto out; 854 case LINUX_VT_SETMODE: 855 error = copyin(SCARG(uap, data), &lvt, sizeof (lvt)); 856 if (error != 0) 857 goto out; 858 lvt.relsig = linux_to_native_signo[lvt.relsig]; 859 lvt.acqsig = linux_to_native_signo[lvt.acqsig]; 860 lvt.frsig = linux_to_native_signo[lvt.frsig]; 861 error = fp->f_ops->fo_ioctl(fp, VT_SETMODE, &lvt); 862 goto out; 863 case LINUX_VT_DISALLOCATE: 864 /* XXX should use WSDISPLAYIO_DELSCREEN */ 865 error = 0; 866 goto out; 867 case LINUX_VT_RELDISP: 868 com = VT_RELDISP; 869 break; 870 case LINUX_VT_ACTIVATE: 871 com = VT_ACTIVATE; 872 break; 873 case LINUX_VT_WAITACTIVE: 874 com = VT_WAITACTIVE; 875 break; 876 case LINUX_VT_GETSTATE: 877 com = VT_GETSTATE; 878 break; 879 case LINUX_KDGKBTYPE: 880 { 881 static const u_int8_t kb101 = KB_101; 882 883 /* This is what Linux does. */ 884 error = copyout(&kb101, SCARG(uap, data), 1); 885 goto out; 886 } 887 case LINUX_KDGKBENT: 888 /* 889 * The Linux KDGKBENT ioctl is different from the 890 * SYSV original. So we handle it in machdep code. 891 * XXX We should use keyboard mapping information 892 * from wsdisplay, but this would be expensive. 893 */ 894 if ((error = copyin(SCARG(uap, data), &kbe, 895 sizeof(struct kbentry)))) 896 goto out; 897 if (kbe.kb_table >= sizeof(linux_keytabs) / sizeof(u_short *) 898 || kbe.kb_index >= NR_KEYS) { 899 error = EINVAL; 900 goto out; 901 } 902 kbe.kb_value = linux_keytabs[kbe.kb_table][kbe.kb_index]; 903 error = copyout(&kbe, SCARG(uap, data), 904 sizeof(struct kbentry)); 905 goto out; 906 #endif 907 case LINUX_HDIO_GETGEO: 908 case LINUX_HDIO_GETGEO_BIG: 909 /* 910 * Try to mimic Linux behaviour: return the BIOS geometry 911 * if possible (extending its # of cylinders if it's beyond 912 * the 1023 limit), fall back to the MI geometry (i.e. 913 * the real geometry) if not found, by returning an 914 * error. See common/linux_hdio.c 915 */ 916 bip = fd2biosinfo(curproc, fp); 917 ioctlf = fp->f_ops->fo_ioctl; 918 error = ioctlf(fp, DIOCGDINFO, (void *)&label); 919 error1 = ioctlf(fp, DIOCGPARTINFO, (void *)&partp); 920 if (error != 0 && error1 != 0) { 921 error = error1; 922 goto out; 923 } 924 start = error1 != 0 ? partp.pi_offset : 0; 925 if (bip != NULL && bip->bi_head != 0 && bip->bi_sec != 0 926 && bip->bi_cyl != 0) { 927 heads = bip->bi_head; 928 sectors = bip->bi_sec; 929 cylinders = bip->bi_cyl; 930 biostotal = heads * sectors * cylinders; 931 realtotal = label.d_ntracks * label.d_nsectors * 932 label.d_ncylinders; 933 if (realtotal > biostotal) 934 cylinders = realtotal / (heads * sectors); 935 } else { 936 heads = label.d_ntracks; 937 cylinders = label.d_ncylinders; 938 sectors = label.d_nsectors; 939 } 940 if (com == LINUX_HDIO_GETGEO) { 941 memset(&hdg, 0, sizeof(hdg)); 942 hdg.start = start; 943 hdg.heads = heads; 944 hdg.cylinders = cylinders; 945 hdg.sectors = sectors; 946 error = copyout(&hdg, SCARG(uap, data), sizeof hdg); 947 goto out; 948 } else { 949 memset(&hdg_big, 0, sizeof(hdg_big)); 950 hdg_big.start = start; 951 hdg_big.heads = heads; 952 hdg_big.cylinders = cylinders; 953 hdg_big.sectors = sectors; 954 error = copyout(&hdg_big, SCARG(uap, data), 955 sizeof hdg_big); 956 goto out; 957 } 958 959 default: 960 /* 961 * Unknown to us. If it's on a device, just pass it through 962 * using PTIOCLINUX, the device itself might be able to 963 * make some sense of it. 964 * XXX hack: if the function returns EJUSTRETURN, 965 * it has stuffed a sysctl return value in pt.data. 966 */ 967 ioctlf = fp->f_ops->fo_ioctl; 968 pt.com = SCARG(uap, com); 969 pt.data = SCARG(uap, data); 970 error = ioctlf(fp, PTIOCLINUX, &pt); 971 if (error == EJUSTRETURN) { 972 retval[0] = (register_t)pt.data; 973 error = 0; 974 } 975 976 if (error == ENOTTY) { 977 DPRINTF(("linux_machdepioctl: invalid ioctl %08lx\n", 978 com)); 979 } 980 goto out; 981 } 982 SCARG(&bia, com) = com; 983 error = sys_ioctl(curlwp, &bia, retval); 984 out: 985 fd_putfile(fd); 986 return error; 987 } 988 989 /* 990 * Set I/O permissions for a process. Just set the maximum level 991 * right away (ignoring the argument), otherwise we would have 992 * to rely on I/O permission maps, which are not implemented. 993 */ 994 int 995 linux_sys_iopl(struct lwp *l, const struct linux_sys_iopl_args *uap, register_t *retval) 996 { 997 /* { 998 syscallarg(int) level; 999 } */ 1000 struct trapframe *fp = l->l_md.md_regs; 1001 1002 if (kauth_authorize_machdep(l->l_cred, KAUTH_MACHDEP_IOPL, 1003 NULL, NULL, NULL, NULL) != 0) 1004 return EPERM; 1005 fp->tf_eflags |= PSL_IOPL; 1006 *retval = 0; 1007 return 0; 1008 } 1009 1010 /* 1011 * See above. If a root process tries to set access to an I/O port, 1012 * just let it have the whole range. 1013 */ 1014 int 1015 linux_sys_ioperm(struct lwp *l, const struct linux_sys_ioperm_args *uap, register_t *retval) 1016 { 1017 /* { 1018 syscallarg(unsigned int) lo; 1019 syscallarg(unsigned int) hi; 1020 syscallarg(int) val; 1021 } */ 1022 struct trapframe *fp = l->l_md.md_regs; 1023 1024 if (kauth_authorize_machdep(l->l_cred, SCARG(uap, val) ? 1025 KAUTH_MACHDEP_IOPERM_SET : KAUTH_MACHDEP_IOPERM_GET, NULL, NULL, 1026 NULL, NULL) != 0) 1027 return EPERM; 1028 if (SCARG(uap, val)) 1029 fp->tf_eflags |= PSL_IOPL; 1030 *retval = 0; 1031 return 0; 1032 } 1033 1034 int 1035 linux_usertrap(struct lwp *l, vaddr_t trapaddr, 1036 void *arg) 1037 { 1038 return 0; 1039 } 1040 1041 const char * 1042 linux_get_uname_arch(void) 1043 { 1044 static char uname_arch[5] = "i386"; 1045 1046 if (uname_arch[1] == '3') 1047 uname_arch[1] += cpu_class; 1048 return uname_arch; 1049 } 1050