1 /* $NetBSD: linux_machdep.c,v 1.169 2021/11/01 05:07:16 thorpej Exp $ */
2
3 /*-
4 * Copyright (c) 1995, 2000, 2008, 2009 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Frank van der Linden, and by Andrew Doran.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
30 */
31
32 #include <sys/cdefs.h>
33 __KERNEL_RCSID(0, "$NetBSD: linux_machdep.c,v 1.169 2021/11/01 05:07:16 thorpej Exp $");
34
35 #if defined(_KERNEL_OPT)
36 #include "opt_user_ldt.h"
37 #endif
38
39 #include <sys/param.h>
40 #include <sys/systm.h>
41 #include <sys/signalvar.h>
42 #include <sys/kernel.h>
43 #include <sys/proc.h>
44 #include <sys/buf.h>
45 #include <sys/reboot.h>
46 #include <sys/conf.h>
47 #include <sys/exec.h>
48 #include <sys/file.h>
49 #include <sys/callout.h>
50 #include <sys/mbuf.h>
51 #include <sys/msgbuf.h>
52 #include <sys/mount.h>
53 #include <sys/vnode.h>
54 #include <sys/device.h>
55 #include <sys/syscallargs.h>
56 #include <sys/filedesc.h>
57 #include <sys/exec_elf.h>
58 #include <sys/disklabel.h>
59 #include <sys/ioctl.h>
60 #include <sys/wait.h>
61 #include <sys/kauth.h>
62 #include <sys/kmem.h>
63
64 #include <miscfs/specfs/specdev.h>
65
66 #include <compat/linux/common/linux_types.h>
67 #include <compat/linux/common/linux_signal.h>
68 #include <compat/linux/common/linux_util.h>
69 #include <compat/linux/common/linux_ioctl.h>
70 #include <compat/linux/common/linux_hdio.h>
71 #include <compat/linux/common/linux_exec.h>
72 #include <compat/linux/common/linux_machdep.h>
73 #include <compat/linux/common/linux_errno.h>
74
75 #include <compat/linux/linux_syscallargs.h>
76
77 #include <sys/cpu.h>
78 #include <machine/cpufunc.h>
79 #include <machine/psl.h>
80 #include <machine/reg.h>
81 #include <machine/segments.h>
82 #include <machine/specialreg.h>
83 #include <machine/sysarch.h>
84 #include <machine/vmparam.h>
85
86 #include <x86/fpu.h>
87
88 /*
89 * To see whether wscons is configured (for virtual console ioctl calls).
90 */
91 #if defined(_KERNEL_OPT)
92 #include "wsdisplay.h"
93 #endif
94 #if (NWSDISPLAY > 0)
95 #include <dev/wscons/wsconsio.h>
96 #include <dev/wscons/wsdisplay_usl_io.h>
97 #if defined(_KERNEL_OPT)
98 #include "opt_xserver.h"
99 #endif
100 #endif
101
102 #ifdef DEBUG_LINUX
103 #define DPRINTF(a) uprintf a
104 #else
105 #define DPRINTF(a)
106 #endif
107
108 extern struct disklist *x86_alldisks;
109
110 static struct biosdisk_info *fd2biosinfo(struct proc *, struct file *);
111 static void linux_save_ucontext(struct lwp *, struct trapframe *,
112 const sigset_t *, stack_t *, struct linux_ucontext *);
113 static void linux_save_sigcontext(struct lwp *, struct trapframe *,
114 const sigset_t *, struct linux_sigcontext *);
115 static int linux_restore_sigcontext(struct lwp *,
116 struct linux_sigcontext *, register_t *);
117 static void linux_rt_sendsig(const ksiginfo_t *, const sigset_t *);
118 static void linux_old_sendsig(const ksiginfo_t *, const sigset_t *);
119
120 extern char linux_sigcode[], linux_rt_sigcode[];
121
122 /*
123 * Deal with some i386-specific things in the Linux emulation code.
124 */
125
126 void
linux_setregs(struct lwp * l,struct exec_package * epp,vaddr_t stack)127 linux_setregs(struct lwp *l, struct exec_package *epp, vaddr_t stack)
128 {
129 struct trapframe *tf;
130
131 #ifdef USER_LDT
132 pmap_ldt_cleanup(l);
133 #endif
134
135 fpu_clear(l, __Linux_NPXCW__);
136
137 tf = l->l_md.md_regs;
138 tf->tf_gs = 0;
139 tf->tf_fs = GSEL(GUDATA_SEL, SEL_UPL);
140 tf->tf_es = GSEL(GUDATA_SEL, SEL_UPL);
141 tf->tf_ds = GSEL(GUDATA_SEL, SEL_UPL);
142 tf->tf_edi = 0;
143 tf->tf_esi = 0;
144 tf->tf_ebp = 0;
145 tf->tf_ebx = l->l_proc->p_psstrp;
146 tf->tf_edx = 0;
147 tf->tf_ecx = 0;
148 tf->tf_eax = 0;
149 tf->tf_eip = epp->ep_entry;
150 tf->tf_cs = GSEL(GUCODEBIG_SEL, SEL_UPL);
151 tf->tf_eflags = PSL_USERSET;
152 tf->tf_esp = stack;
153 tf->tf_ss = GSEL(GUDATA_SEL, SEL_UPL);
154 }
155
156 /*
157 * Send an interrupt to process.
158 *
159 * Stack is set up to allow sigcode stored
160 * in u. to call routine, followed by kcall
161 * to sigreturn routine below. After sigreturn
162 * resets the signal mask, the stack, and the
163 * frame pointer, it returns to the user
164 * specified pc, psl.
165 */
166
167 void
linux_sendsig(const ksiginfo_t * ksi,const sigset_t * mask)168 linux_sendsig(const ksiginfo_t *ksi, const sigset_t *mask)
169 {
170 if (SIGACTION(curproc, ksi->ksi_signo).sa_flags & SA_SIGINFO)
171 linux_rt_sendsig(ksi, mask);
172 else
173 linux_old_sendsig(ksi, mask);
174 }
175
176
177 static void
linux_save_ucontext(struct lwp * l,struct trapframe * tf,const sigset_t * mask,stack_t * sas,struct linux_ucontext * uc)178 linux_save_ucontext(struct lwp *l, struct trapframe *tf, const sigset_t *mask,
179 stack_t *sas, struct linux_ucontext *uc)
180 {
181 uc->uc_flags = 0;
182 uc->uc_link = NULL;
183 native_to_linux_sigaltstack(&uc->uc_stack, sas);
184 linux_save_sigcontext(l, tf, mask, &uc->uc_mcontext);
185 native_to_linux_sigset(&uc->uc_sigmask, mask);
186 (void)memset(&uc->uc_fpregs_mem, 0, sizeof(uc->uc_fpregs_mem));
187 }
188
189 static void
linux_save_sigcontext(struct lwp * l,struct trapframe * tf,const sigset_t * mask,struct linux_sigcontext * sc)190 linux_save_sigcontext(struct lwp *l, struct trapframe *tf,
191 const sigset_t *mask, struct linux_sigcontext *sc)
192 {
193 struct pcb *pcb = lwp_getpcb(l);
194
195 /* Save register context. */
196 sc->sc_gs = tf->tf_gs;
197 sc->sc_fs = tf->tf_fs;
198 sc->sc_es = tf->tf_es;
199 sc->sc_ds = tf->tf_ds;
200 sc->sc_eflags = tf->tf_eflags;
201
202 sc->sc_edi = tf->tf_edi;
203 sc->sc_esi = tf->tf_esi;
204 sc->sc_esp = tf->tf_esp;
205 sc->sc_ebp = tf->tf_ebp;
206 sc->sc_ebx = tf->tf_ebx;
207 sc->sc_edx = tf->tf_edx;
208 sc->sc_ecx = tf->tf_ecx;
209 sc->sc_eax = tf->tf_eax;
210 sc->sc_eip = tf->tf_eip;
211 sc->sc_cs = tf->tf_cs;
212 sc->sc_esp_at_signal = tf->tf_esp;
213 sc->sc_ss = tf->tf_ss;
214 sc->sc_err = tf->tf_err;
215 sc->sc_trapno = tf->tf_trapno;
216 sc->sc_cr2 = pcb->pcb_cr2;
217 sc->sc_387 = NULL;
218
219 /* Save signal stack. */
220 /* Linux doesn't save the onstack flag in sigframe */
221
222 /* Save signal mask. */
223 native_to_linux_old_sigset(&sc->sc_mask, mask);
224 }
225
226 static void
linux_rt_sendsig(const ksiginfo_t * ksi,const sigset_t * mask)227 linux_rt_sendsig(const ksiginfo_t *ksi, const sigset_t *mask)
228 {
229 struct lwp *l = curlwp;
230 struct proc *p = l->l_proc;
231 struct trapframe *tf;
232 struct linux_rt_sigframe *fp, frame;
233 int onstack, error;
234 int sig = ksi->ksi_signo;
235 sig_t catcher = SIGACTION(p, sig).sa_handler;
236 stack_t *sas = &l->l_sigstk;
237
238 tf = l->l_md.md_regs;
239 /* Do we need to jump onto the signal stack? */
240 onstack = (sas->ss_flags & (SS_DISABLE | SS_ONSTACK)) == 0 &&
241 (SIGACTION(p, sig).sa_flags & SA_ONSTACK) != 0;
242
243
244 /* Allocate space for the signal handler context. */
245 if (onstack)
246 fp = (struct linux_rt_sigframe *)((char *)sas->ss_sp +
247 sas->ss_size);
248 else
249 fp = (struct linux_rt_sigframe *)tf->tf_esp;
250 fp--;
251
252 DPRINTF(("rt: onstack = %d, fp = %p sig = %d eip = 0x%x cr2 = 0x%x\n",
253 onstack, fp, sig, tf->tf_eip,
254 ((struct pcb *)lwp_getpcb(l))->pcb_cr2));
255
256 memset(&frame, 0, sizeof(frame));
257
258 /* Build stack frame for signal trampoline. */
259 frame.sf_handler = catcher;
260 frame.sf_sig = native_to_linux_signo[sig];
261 frame.sf_sip = &fp->sf_si;
262 frame.sf_ucp = &fp->sf_uc;
263
264 /*
265 * XXX: the following code assumes that the constants for
266 * siginfo are the same between linux and NetBSD.
267 */
268 native_to_linux_siginfo(&frame.sf_si, &ksi->ksi_info);
269
270 /* Save register context. */
271 linux_save_ucontext(l, tf, mask, sas, &frame.sf_uc);
272 sendsig_reset(l, sig);
273
274 mutex_exit(p->p_lock);
275 error = copyout(&frame, fp, sizeof(frame));
276 mutex_enter(p->p_lock);
277
278 if (error != 0) {
279 /*
280 * Process has trashed its stack; give it an illegal
281 * instruction to halt it in its tracks.
282 */
283 sigexit(l, SIGILL);
284 /* NOTREACHED */
285 }
286
287 /*
288 * Build context to run handler in.
289 */
290 tf->tf_fs = GSEL(GUDATA_SEL, SEL_UPL);
291 tf->tf_es = GSEL(GUDATA_SEL, SEL_UPL);
292 tf->tf_ds = GSEL(GUDATA_SEL, SEL_UPL);
293 tf->tf_eip = ((int)p->p_sigctx.ps_sigcode) +
294 (linux_rt_sigcode - linux_sigcode);
295 tf->tf_cs = GSEL(GUCODE_SEL, SEL_UPL);
296 tf->tf_eflags &= ~PSL_CLEARSIG;
297 tf->tf_esp = (int)fp;
298 tf->tf_ss = GSEL(GUDATA_SEL, SEL_UPL);
299
300 /* Remember that we're now on the signal stack. */
301 if (onstack)
302 sas->ss_flags |= SS_ONSTACK;
303 }
304
305 static void
linux_old_sendsig(const ksiginfo_t * ksi,const sigset_t * mask)306 linux_old_sendsig(const ksiginfo_t *ksi, const sigset_t *mask)
307 {
308 struct lwp *l = curlwp;
309 struct proc *p = l->l_proc;
310 struct trapframe *tf;
311 struct linux_sigframe *fp, frame;
312 int onstack, error;
313 int sig = ksi->ksi_signo;
314 sig_t catcher = SIGACTION(p, sig).sa_handler;
315 stack_t *sas = &l->l_sigstk;
316
317 tf = l->l_md.md_regs;
318
319 /* Do we need to jump onto the signal stack? */
320 onstack = (sas->ss_flags & (SS_DISABLE | SS_ONSTACK)) == 0 &&
321 (SIGACTION(p, sig).sa_flags & SA_ONSTACK) != 0;
322
323 /* Allocate space for the signal handler context. */
324 if (onstack)
325 fp = (struct linux_sigframe *) ((char *)sas->ss_sp +
326 sas->ss_size);
327 else
328 fp = (struct linux_sigframe *)tf->tf_esp;
329 fp--;
330
331 DPRINTF(("old: onstack = %d, fp = %p sig = %d eip = 0x%x cr2 = 0x%x\n",
332 onstack, fp, sig, tf->tf_eip,
333 ((struct pcb *)lwp_getpcb(l))->pcb_cr2));
334
335 memset(&frame, 0, sizeof(frame));
336
337 /* Build stack frame for signal trampoline. */
338 frame.sf_handler = catcher;
339 frame.sf_sig = native_to_linux_signo[sig];
340
341 linux_save_sigcontext(l, tf, mask, &frame.sf_sc);
342 sendsig_reset(l, sig);
343
344 mutex_exit(p->p_lock);
345 error = copyout(&frame, fp, sizeof(frame));
346 mutex_enter(p->p_lock);
347
348 if (error != 0) {
349 /*
350 * Process has trashed its stack; give it an illegal
351 * instruction to halt it in its tracks.
352 */
353 sigexit(l, SIGILL);
354 /* NOTREACHED */
355 }
356
357 /*
358 * Build context to run handler in.
359 */
360 tf->tf_fs = GSEL(GUDATA_SEL, SEL_UPL);
361 tf->tf_es = GSEL(GUDATA_SEL, SEL_UPL);
362 tf->tf_ds = GSEL(GUDATA_SEL, SEL_UPL);
363 tf->tf_eip = (int)p->p_sigctx.ps_sigcode;
364 tf->tf_cs = GSEL(GUCODEBIG_SEL, SEL_UPL);
365 tf->tf_eflags &= ~PSL_CLEARSIG;
366 tf->tf_esp = (int)fp;
367 tf->tf_ss = GSEL(GUDATA_SEL, SEL_UPL);
368
369 /* Remember that we're now on the signal stack. */
370 if (onstack)
371 sas->ss_flags |= SS_ONSTACK;
372 }
373
374 /*
375 * System call to cleanup state after a signal
376 * has been taken. Reset signal mask and
377 * stack state from context left by sendsig (above).
378 * Return to previous pc and psl as specified by
379 * context left by sendsig. Check carefully to
380 * make sure that the user has not modified the
381 * psl to gain improper privileges or to cause
382 * a machine fault.
383 */
384 int
linux_sys_rt_sigreturn(struct lwp * l,const struct linux_sys_rt_sigreturn_args * uap,register_t * retval)385 linux_sys_rt_sigreturn(struct lwp *l, const struct linux_sys_rt_sigreturn_args *uap, register_t *retval)
386 {
387 /* {
388 syscallarg(struct linux_ucontext *) ucp;
389 } */
390 struct linux_ucontext context, *ucp = SCARG(uap, ucp);
391 int error;
392
393 /*
394 * The trampoline code hands us the context.
395 * It is unsafe to keep track of it ourselves, in the event that a
396 * program jumps out of a signal handler.
397 */
398 if ((error = copyin(ucp, &context, sizeof(*ucp))) != 0)
399 return error;
400
401 /* XXX XAX we can do better here by using more of the ucontext */
402 return linux_restore_sigcontext(l, &context.uc_mcontext, retval);
403 }
404
405 int
linux_sys_sigreturn(struct lwp * l,const struct linux_sys_sigreturn_args * uap,register_t * retval)406 linux_sys_sigreturn(struct lwp *l, const struct linux_sys_sigreturn_args *uap, register_t *retval)
407 {
408 /* {
409 syscallarg(struct linux_sigcontext *) scp;
410 } */
411 struct linux_sigcontext context, *scp = SCARG(uap, scp);
412 int error;
413
414 /*
415 * The trampoline code hands us the context.
416 * It is unsafe to keep track of it ourselves, in the event that a
417 * program jumps out of a signal handler.
418 */
419 if ((error = copyin((void *)scp, &context, sizeof(*scp))) != 0)
420 return error;
421 return linux_restore_sigcontext(l, &context, retval);
422 }
423
424 static int
linux_restore_sigcontext(struct lwp * l,struct linux_sigcontext * scp,register_t * retval)425 linux_restore_sigcontext(struct lwp *l, struct linux_sigcontext *scp,
426 register_t *retval)
427 {
428 struct proc *p = l->l_proc;
429 stack_t *sas = &l->l_sigstk;
430 struct trapframe *tf;
431 sigset_t mask;
432 ssize_t ss_gap;
433
434 /* Restore register context. */
435 tf = l->l_md.md_regs;
436 DPRINTF(("sigreturn enter esp=0x%x eip=0x%x\n", tf->tf_esp, tf->tf_eip));
437
438 /*
439 * Check for security violations. If we're returning to
440 * protected mode, the CPU will validate the segment registers
441 * automatically and generate a trap on violations. We handle
442 * the trap, rather than doing all of the checking here.
443 */
444 if (((scp->sc_eflags ^ tf->tf_eflags) & PSL_USERSTATIC) != 0 ||
445 !USERMODE(scp->sc_cs))
446 return EINVAL;
447
448 tf->tf_gs = scp->sc_gs;
449 tf->tf_fs = scp->sc_fs;
450 tf->tf_es = scp->sc_es;
451 tf->tf_ds = scp->sc_ds;
452 tf->tf_eflags = scp->sc_eflags;
453
454 tf->tf_edi = scp->sc_edi;
455 tf->tf_esi = scp->sc_esi;
456 tf->tf_ebp = scp->sc_ebp;
457 tf->tf_ebx = scp->sc_ebx;
458 tf->tf_edx = scp->sc_edx;
459 tf->tf_ecx = scp->sc_ecx;
460 tf->tf_eax = scp->sc_eax;
461 tf->tf_eip = scp->sc_eip;
462 tf->tf_cs = scp->sc_cs;
463 tf->tf_esp = scp->sc_esp_at_signal;
464 tf->tf_ss = scp->sc_ss;
465
466 /* Restore signal stack. */
467 /*
468 * Linux really does it this way; it doesn't have space in sigframe
469 * to save the onstack flag.
470 */
471 mutex_enter(p->p_lock);
472 ss_gap = (ssize_t)((char *)scp->sc_esp_at_signal - (char *)sas->ss_sp);
473 if (ss_gap >= 0 && ss_gap < sas->ss_size)
474 sas->ss_flags |= SS_ONSTACK;
475 else
476 sas->ss_flags &= ~SS_ONSTACK;
477
478 /* Restore signal mask. */
479 linux_old_to_native_sigset(&mask, &scp->sc_mask);
480 (void) sigprocmask1(l, SIG_SETMASK, &mask, 0);
481 mutex_exit(p->p_lock);
482
483 DPRINTF(("sigreturn exit esp=0x%x eip=0x%x\n", tf->tf_esp, tf->tf_eip));
484 return EJUSTRETURN;
485 }
486
487 #ifdef USER_LDT
488
489 static int
linux_read_ldt(struct lwp * l,const struct linux_sys_modify_ldt_args * uap,register_t * retval)490 linux_read_ldt(struct lwp *l, const struct linux_sys_modify_ldt_args *uap,
491 register_t *retval)
492 {
493 struct x86_get_ldt_args gl;
494 int error;
495 union descriptor *ldt_buf;
496 size_t sz;
497
498 /*
499 * I've checked the linux code - this function is asymmetric with
500 * linux_write_ldt, and returns raw ldt entries.
501 * NB, the code I saw zerod the spare parts of the user buffer.
502 */
503
504 DPRINTF(("linux_read_ldt!"));
505
506 sz = 8192 * sizeof(*ldt_buf);
507 ldt_buf = kmem_zalloc(sz, KM_SLEEP);
508 gl.start = 0;
509 gl.desc = NULL;
510 gl.num = SCARG(uap, bytecount) / sizeof(union descriptor);
511 error = x86_get_ldt1(l, &gl, ldt_buf);
512 /* NB gl.num might have changed */
513 if (error == 0) {
514 *retval = gl.num * sizeof(*ldtstore);
515 error = copyout(ldt_buf, SCARG(uap, ptr),
516 gl.num * sizeof *ldt_buf);
517 }
518 kmem_free(ldt_buf, sz);
519
520 return error;
521 }
522
523 struct linux_ldt_info {
524 u_int entry_number;
525 u_long base_addr;
526 u_int limit;
527 u_int seg_32bit:1;
528 u_int contents:2;
529 u_int read_exec_only:1;
530 u_int limit_in_pages:1;
531 u_int seg_not_present:1;
532 u_int useable:1;
533 };
534
535 static int
linux_write_ldt(struct lwp * l,const struct linux_sys_modify_ldt_args * uap,int oldmode)536 linux_write_ldt(struct lwp *l, const struct linux_sys_modify_ldt_args *uap,
537 int oldmode)
538 {
539 struct linux_ldt_info ldt_info;
540 union descriptor d;
541 struct x86_set_ldt_args sl;
542 int error;
543
544 DPRINTF(("linux_write_ldt %d\n", oldmode));
545 if (SCARG(uap, bytecount) != sizeof(ldt_info))
546 return (EINVAL);
547 if ((error = copyin(SCARG(uap, ptr), &ldt_info, sizeof(ldt_info))) != 0)
548 return error;
549 if (ldt_info.entry_number >= 8192)
550 return (EINVAL);
551 if (ldt_info.contents == 3) {
552 if (oldmode)
553 return (EINVAL);
554 if (ldt_info.seg_not_present)
555 return (EINVAL);
556 }
557
558 if (ldt_info.base_addr == 0 && ldt_info.limit == 0 &&
559 (oldmode || (ldt_info.contents == 0 &&
560 ldt_info.read_exec_only == 1 && ldt_info.seg_32bit == 0 &&
561 ldt_info.limit_in_pages == 0 && ldt_info.seg_not_present == 1 &&
562 ldt_info.useable == 0))) {
563 /* this means you should zero the ldt */
564 (void)memset(&d, 0, sizeof(d));
565 } else {
566 d.sd.sd_lobase = ldt_info.base_addr & 0xffffff;
567 d.sd.sd_hibase = (ldt_info.base_addr >> 24) & 0xff;
568 d.sd.sd_lolimit = ldt_info.limit & 0xffff;
569 d.sd.sd_hilimit = (ldt_info.limit >> 16) & 0xf;
570 d.sd.sd_type = 16 | (ldt_info.contents << 2) |
571 (!ldt_info.read_exec_only << 1);
572 d.sd.sd_dpl = SEL_UPL;
573 d.sd.sd_p = !ldt_info.seg_not_present;
574 d.sd.sd_def32 = ldt_info.seg_32bit;
575 d.sd.sd_gran = ldt_info.limit_in_pages;
576 if (!oldmode)
577 d.sd.sd_xx = ldt_info.useable;
578 else
579 d.sd.sd_xx = 0;
580 }
581 sl.start = ldt_info.entry_number;
582 sl.desc = NULL;
583 sl.num = 1;
584
585 DPRINTF(("linux_write_ldt: idx=%d, base=0x%lx, limit=0x%x\n",
586 ldt_info.entry_number, ldt_info.base_addr, ldt_info.limit));
587
588 return x86_set_ldt1(l, &sl, &d);
589 }
590
591 #endif /* USER_LDT */
592
593 int
linux_sys_modify_ldt(struct lwp * l,const struct linux_sys_modify_ldt_args * uap,register_t * retval)594 linux_sys_modify_ldt(struct lwp *l, const struct linux_sys_modify_ldt_args *uap, register_t *retval)
595 {
596 /* {
597 syscallarg(int) func;
598 syscallarg(void *) ptr;
599 syscallarg(size_t) bytecount;
600 } */
601
602 switch (SCARG(uap, func)) {
603 #ifdef USER_LDT
604 case 0:
605 return linux_read_ldt(l, (const void *)uap, retval);
606 case 1:
607 return linux_write_ldt(l, (const void *)uap, 1);
608 case 2:
609 #ifdef notyet
610 return linux_read_default_ldt(l, (const void *)uap, retval);
611 #else
612 return (ENOSYS);
613 #endif
614 case 0x11:
615 return linux_write_ldt(l, (const void *)uap, 0);
616 #endif /* USER_LDT */
617
618 default:
619 return (ENOSYS);
620 }
621 }
622
623 /*
624 * XXX Pathetic hack to make svgalib work. This will fake the major
625 * device number of an opened VT so that svgalib likes it. grmbl.
626 * Should probably do it 'wrong the right way' and use a mapping
627 * array for all major device numbers, and map linux_mknod too.
628 */
629 dev_t
linux_fakedev(dev_t dev,int raw)630 linux_fakedev(dev_t dev, int raw)
631 {
632 extern const struct cdevsw ptc_cdevsw, pts_cdevsw;
633 const struct cdevsw *cd = cdevsw_lookup(dev);
634
635 if (raw) {
636 #if (NWSDISPLAY > 0)
637 extern const struct cdevsw wsdisplay_cdevsw;
638 if (cd == &wsdisplay_cdevsw)
639 return makedev(LINUX_CONS_MAJOR, (minor(dev) + 1));
640 #endif
641 }
642
643 if (cd == &ptc_cdevsw)
644 return makedev(LINUX_PTC_MAJOR, minor(dev));
645 if (cd == &pts_cdevsw)
646 return makedev(LINUX_PTS_MAJOR, minor(dev));
647
648 return dev;
649 }
650
651 #if (NWSDISPLAY > 0)
652 /*
653 * That's not complete, but enough to get an X server running.
654 */
655 #define NR_KEYS 128
656 static const u_short plain_map[NR_KEYS] = {
657 0x0200, 0x001b, 0x0031, 0x0032, 0x0033, 0x0034, 0x0035, 0x0036,
658 0x0037, 0x0038, 0x0039, 0x0030, 0x002d, 0x003d, 0x007f, 0x0009,
659 0x0b71, 0x0b77, 0x0b65, 0x0b72, 0x0b74, 0x0b79, 0x0b75, 0x0b69,
660 0x0b6f, 0x0b70, 0x005b, 0x005d, 0x0201, 0x0702, 0x0b61, 0x0b73,
661 0x0b64, 0x0b66, 0x0b67, 0x0b68, 0x0b6a, 0x0b6b, 0x0b6c, 0x003b,
662 0x0027, 0x0060, 0x0700, 0x005c, 0x0b7a, 0x0b78, 0x0b63, 0x0b76,
663 0x0b62, 0x0b6e, 0x0b6d, 0x002c, 0x002e, 0x002f, 0x0700, 0x030c,
664 0x0703, 0x0020, 0x0207, 0x0100, 0x0101, 0x0102, 0x0103, 0x0104,
665 0x0105, 0x0106, 0x0107, 0x0108, 0x0109, 0x0208, 0x0209, 0x0307,
666 0x0308, 0x0309, 0x030b, 0x0304, 0x0305, 0x0306, 0x030a, 0x0301,
667 0x0302, 0x0303, 0x0300, 0x0310, 0x0206, 0x0200, 0x003c, 0x010a,
668 0x010b, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200,
669 0x030e, 0x0702, 0x030d, 0x001c, 0x0701, 0x0205, 0x0114, 0x0603,
670 0x0118, 0x0601, 0x0602, 0x0117, 0x0600, 0x0119, 0x0115, 0x0116,
671 0x011a, 0x010c, 0x010d, 0x011b, 0x011c, 0x0110, 0x0311, 0x011d,
672 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200,
673 }, shift_map[NR_KEYS] = {
674 0x0200, 0x001b, 0x0021, 0x0040, 0x0023, 0x0024, 0x0025, 0x005e,
675 0x0026, 0x002a, 0x0028, 0x0029, 0x005f, 0x002b, 0x007f, 0x0009,
676 0x0b51, 0x0b57, 0x0b45, 0x0b52, 0x0b54, 0x0b59, 0x0b55, 0x0b49,
677 0x0b4f, 0x0b50, 0x007b, 0x007d, 0x0201, 0x0702, 0x0b41, 0x0b53,
678 0x0b44, 0x0b46, 0x0b47, 0x0b48, 0x0b4a, 0x0b4b, 0x0b4c, 0x003a,
679 0x0022, 0x007e, 0x0700, 0x007c, 0x0b5a, 0x0b58, 0x0b43, 0x0b56,
680 0x0b42, 0x0b4e, 0x0b4d, 0x003c, 0x003e, 0x003f, 0x0700, 0x030c,
681 0x0703, 0x0020, 0x0207, 0x010a, 0x010b, 0x010c, 0x010d, 0x010e,
682 0x010f, 0x0110, 0x0111, 0x0112, 0x0113, 0x0213, 0x0203, 0x0307,
683 0x0308, 0x0309, 0x030b, 0x0304, 0x0305, 0x0306, 0x030a, 0x0301,
684 0x0302, 0x0303, 0x0300, 0x0310, 0x0206, 0x0200, 0x003e, 0x010a,
685 0x010b, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200,
686 0x030e, 0x0702, 0x030d, 0x0200, 0x0701, 0x0205, 0x0114, 0x0603,
687 0x020b, 0x0601, 0x0602, 0x0117, 0x0600, 0x020a, 0x0115, 0x0116,
688 0x011a, 0x010c, 0x010d, 0x011b, 0x011c, 0x0110, 0x0311, 0x011d,
689 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200,
690 }, altgr_map[NR_KEYS] = {
691 0x0200, 0x0200, 0x0200, 0x0040, 0x0200, 0x0024, 0x0200, 0x0200,
692 0x007b, 0x005b, 0x005d, 0x007d, 0x005c, 0x0200, 0x0200, 0x0200,
693 0x0b71, 0x0b77, 0x0918, 0x0b72, 0x0b74, 0x0b79, 0x0b75, 0x0b69,
694 0x0b6f, 0x0b70, 0x0200, 0x007e, 0x0201, 0x0702, 0x0914, 0x0b73,
695 0x0917, 0x0919, 0x0b67, 0x0b68, 0x0b6a, 0x0b6b, 0x0b6c, 0x0200,
696 0x0200, 0x0200, 0x0700, 0x0200, 0x0b7a, 0x0b78, 0x0916, 0x0b76,
697 0x0915, 0x0b6e, 0x0b6d, 0x0200, 0x0200, 0x0200, 0x0700, 0x030c,
698 0x0703, 0x0200, 0x0207, 0x050c, 0x050d, 0x050e, 0x050f, 0x0510,
699 0x0511, 0x0512, 0x0513, 0x0514, 0x0515, 0x0208, 0x0202, 0x0911,
700 0x0912, 0x0913, 0x030b, 0x090e, 0x090f, 0x0910, 0x030a, 0x090b,
701 0x090c, 0x090d, 0x090a, 0x0310, 0x0206, 0x0200, 0x007c, 0x0516,
702 0x0517, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200,
703 0x030e, 0x0702, 0x030d, 0x0200, 0x0701, 0x0205, 0x0114, 0x0603,
704 0x0118, 0x0601, 0x0602, 0x0117, 0x0600, 0x0119, 0x0115, 0x0116,
705 0x011a, 0x010c, 0x010d, 0x011b, 0x011c, 0x0110, 0x0311, 0x011d,
706 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200,
707 }, ctrl_map[NR_KEYS] = {
708 0x0200, 0x0200, 0x0200, 0x0000, 0x001b, 0x001c, 0x001d, 0x001e,
709 0x001f, 0x007f, 0x0200, 0x0200, 0x001f, 0x0200, 0x0008, 0x0200,
710 0x0011, 0x0017, 0x0005, 0x0012, 0x0014, 0x0019, 0x0015, 0x0009,
711 0x000f, 0x0010, 0x001b, 0x001d, 0x0201, 0x0702, 0x0001, 0x0013,
712 0x0004, 0x0006, 0x0007, 0x0008, 0x000a, 0x000b, 0x000c, 0x0200,
713 0x0007, 0x0000, 0x0700, 0x001c, 0x001a, 0x0018, 0x0003, 0x0016,
714 0x0002, 0x000e, 0x000d, 0x0200, 0x020e, 0x007f, 0x0700, 0x030c,
715 0x0703, 0x0000, 0x0207, 0x0100, 0x0101, 0x0102, 0x0103, 0x0104,
716 0x0105, 0x0106, 0x0107, 0x0108, 0x0109, 0x0208, 0x0204, 0x0307,
717 0x0308, 0x0309, 0x030b, 0x0304, 0x0305, 0x0306, 0x030a, 0x0301,
718 0x0302, 0x0303, 0x0300, 0x0310, 0x0206, 0x0200, 0x0200, 0x010a,
719 0x010b, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200,
720 0x030e, 0x0702, 0x030d, 0x001c, 0x0701, 0x0205, 0x0114, 0x0603,
721 0x0118, 0x0601, 0x0602, 0x0117, 0x0600, 0x0119, 0x0115, 0x0116,
722 0x011a, 0x010c, 0x010d, 0x011b, 0x011c, 0x0110, 0x0311, 0x011d,
723 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200,
724 };
725
726 const u_short * const linux_keytabs[] = {
727 plain_map, shift_map, altgr_map, altgr_map, ctrl_map
728 };
729 #endif
730
731 static struct biosdisk_info *
fd2biosinfo(struct proc * p,struct file * fp)732 fd2biosinfo(struct proc *p, struct file *fp)
733 {
734 struct vnode *vp;
735 const char *blkname;
736 char diskname[16];
737 int i;
738 struct nativedisk_info *nip;
739 struct disklist *dl = x86_alldisks;
740
741 if (dl == NULL)
742 return NULL;
743 if (fp->f_type != DTYPE_VNODE)
744 return NULL;
745 vp = (struct vnode *)fp->f_data;
746
747 if (vp->v_type != VBLK)
748 return NULL;
749
750 blkname = devsw_blk2name(major(vp->v_rdev));
751 snprintf(diskname, sizeof diskname, "%s%llu", blkname,
752 (unsigned long long)DISKUNIT(vp->v_rdev));
753
754 for (i = 0; i < dl->dl_nnativedisks; i++) {
755 nip = &dl->dl_nativedisks[i];
756 if (strcmp(diskname, nip->ni_devname))
757 continue;
758 if (nip->ni_nmatches != 0)
759 return &dl->dl_biosdisks[nip->ni_biosmatches[0]];
760 }
761
762 return NULL;
763 }
764
765
766 /*
767 * We come here in a last attempt to satisfy a Linux ioctl() call
768 */
769 int
linux_machdepioctl(struct lwp * l,const struct linux_sys_ioctl_args * uap,register_t * retval)770 linux_machdepioctl(struct lwp *l, const struct linux_sys_ioctl_args *uap, register_t *retval)
771 {
772 /* {
773 syscallarg(int) fd;
774 syscallarg(u_long) com;
775 syscallarg(void *) data;
776 } */
777 struct sys_ioctl_args bia;
778 u_long com;
779 int error, error1;
780 #if (NWSDISPLAY > 0)
781 struct vt_mode lvt;
782 struct kbentry kbe;
783 #endif
784 struct linux_hd_geometry hdg;
785 struct linux_hd_big_geometry hdg_big;
786 struct biosdisk_info *bip;
787 file_t *fp;
788 int fd;
789 struct disklabel label;
790 struct partinfo partp;
791 int (*ioctlf)(struct file *, u_long, void *);
792 u_long start, biostotal, realtotal;
793 u_char heads, sectors;
794 u_int cylinders;
795 struct ioctl_pt pt;
796
797 fd = SCARG(uap, fd);
798 SCARG(&bia, fd) = fd;
799 SCARG(&bia, data) = SCARG(uap, data);
800 com = SCARG(uap, com);
801
802 if ((fp = fd_getfile(fd)) == NULL)
803 return (EBADF);
804
805 switch (com) {
806 #if (NWSDISPLAY > 0)
807 case LINUX_KDGKBMODE:
808 com = KDGKBMODE;
809 break;
810 case LINUX_KDSKBMODE:
811 com = KDSKBMODE;
812 if ((unsigned)SCARG(uap, data) == LINUX_K_MEDIUMRAW)
813 SCARG(&bia, data) = (void *)K_RAW;
814 break;
815 case LINUX_KIOCSOUND:
816 SCARG(&bia, data) =
817 (void *)(((unsigned long)SCARG(&bia, data)) & 0xffff);
818 /* fall through */
819 case LINUX_KDMKTONE:
820 com = KDMKTONE;
821 break;
822 case LINUX_KDSETMODE:
823 com = KDSETMODE;
824 break;
825 case LINUX_KDGETMODE:
826 /* KD_* values are equal to the wscons numbers */
827 com = WSDISPLAYIO_GMODE;
828 break;
829 case LINUX_KDENABIO:
830 com = KDENABIO;
831 break;
832 case LINUX_KDDISABIO:
833 com = KDDISABIO;
834 break;
835 case LINUX_KDGETLED:
836 com = KDGETLED;
837 break;
838 case LINUX_KDSETLED:
839 com = KDSETLED;
840 break;
841 case LINUX_VT_OPENQRY:
842 com = VT_OPENQRY;
843 break;
844 case LINUX_VT_GETMODE:
845 memset(&lvt, 0, sizeof(lvt));
846 error = fp->f_ops->fo_ioctl(fp, VT_GETMODE, &lvt);
847 if (error != 0)
848 goto out;
849 lvt.relsig = native_to_linux_signo[lvt.relsig];
850 lvt.acqsig = native_to_linux_signo[lvt.acqsig];
851 lvt.frsig = native_to_linux_signo[lvt.frsig];
852 error = copyout(&lvt, SCARG(uap, data), sizeof (lvt));
853 goto out;
854 case LINUX_VT_SETMODE:
855 error = copyin(SCARG(uap, data), &lvt, sizeof (lvt));
856 if (error != 0)
857 goto out;
858 lvt.relsig = linux_to_native_signo[lvt.relsig];
859 lvt.acqsig = linux_to_native_signo[lvt.acqsig];
860 lvt.frsig = linux_to_native_signo[lvt.frsig];
861 error = fp->f_ops->fo_ioctl(fp, VT_SETMODE, &lvt);
862 goto out;
863 case LINUX_VT_DISALLOCATE:
864 /* XXX should use WSDISPLAYIO_DELSCREEN */
865 error = 0;
866 goto out;
867 case LINUX_VT_RELDISP:
868 com = VT_RELDISP;
869 break;
870 case LINUX_VT_ACTIVATE:
871 com = VT_ACTIVATE;
872 break;
873 case LINUX_VT_WAITACTIVE:
874 com = VT_WAITACTIVE;
875 break;
876 case LINUX_VT_GETSTATE:
877 com = VT_GETSTATE;
878 break;
879 case LINUX_KDGKBTYPE:
880 {
881 static const u_int8_t kb101 = KB_101;
882
883 /* This is what Linux does. */
884 error = copyout(&kb101, SCARG(uap, data), 1);
885 goto out;
886 }
887 case LINUX_KDGKBENT:
888 /*
889 * The Linux KDGKBENT ioctl is different from the
890 * SYSV original. So we handle it in machdep code.
891 * XXX We should use keyboard mapping information
892 * from wsdisplay, but this would be expensive.
893 */
894 if ((error = copyin(SCARG(uap, data), &kbe,
895 sizeof(struct kbentry))))
896 goto out;
897 if (kbe.kb_table >= sizeof(linux_keytabs) / sizeof(u_short *)
898 || kbe.kb_index >= NR_KEYS) {
899 error = EINVAL;
900 goto out;
901 }
902 kbe.kb_value = linux_keytabs[kbe.kb_table][kbe.kb_index];
903 error = copyout(&kbe, SCARG(uap, data),
904 sizeof(struct kbentry));
905 goto out;
906 #endif
907 case LINUX_HDIO_GETGEO:
908 case LINUX_HDIO_GETGEO_BIG:
909 /*
910 * Try to mimic Linux behaviour: return the BIOS geometry
911 * if possible (extending its # of cylinders if it's beyond
912 * the 1023 limit), fall back to the MI geometry (i.e.
913 * the real geometry) if not found, by returning an
914 * error. See common/linux_hdio.c
915 */
916 bip = fd2biosinfo(curproc, fp);
917 ioctlf = fp->f_ops->fo_ioctl;
918 error = ioctlf(fp, DIOCGDINFO, (void *)&label);
919 error1 = ioctlf(fp, DIOCGPARTINFO, (void *)&partp);
920 if (error != 0 && error1 != 0) {
921 error = error1;
922 goto out;
923 }
924 start = error1 != 0 ? partp.pi_offset : 0;
925 if (bip != NULL && bip->bi_head != 0 && bip->bi_sec != 0
926 && bip->bi_cyl != 0) {
927 heads = bip->bi_head;
928 sectors = bip->bi_sec;
929 cylinders = bip->bi_cyl;
930 biostotal = heads * sectors * cylinders;
931 realtotal = label.d_ntracks * label.d_nsectors *
932 label.d_ncylinders;
933 if (realtotal > biostotal)
934 cylinders = realtotal / (heads * sectors);
935 } else {
936 heads = label.d_ntracks;
937 cylinders = label.d_ncylinders;
938 sectors = label.d_nsectors;
939 }
940 if (com == LINUX_HDIO_GETGEO) {
941 memset(&hdg, 0, sizeof(hdg));
942 hdg.start = start;
943 hdg.heads = heads;
944 hdg.cylinders = cylinders;
945 hdg.sectors = sectors;
946 error = copyout(&hdg, SCARG(uap, data), sizeof hdg);
947 goto out;
948 } else {
949 memset(&hdg_big, 0, sizeof(hdg_big));
950 hdg_big.start = start;
951 hdg_big.heads = heads;
952 hdg_big.cylinders = cylinders;
953 hdg_big.sectors = sectors;
954 error = copyout(&hdg_big, SCARG(uap, data),
955 sizeof hdg_big);
956 goto out;
957 }
958
959 default:
960 /*
961 * Unknown to us. If it's on a device, just pass it through
962 * using PTIOCLINUX, the device itself might be able to
963 * make some sense of it.
964 * XXX hack: if the function returns EJUSTRETURN,
965 * it has stuffed a sysctl return value in pt.data.
966 */
967 ioctlf = fp->f_ops->fo_ioctl;
968 pt.com = SCARG(uap, com);
969 pt.data = SCARG(uap, data);
970 error = ioctlf(fp, PTIOCLINUX, &pt);
971 if (error == EJUSTRETURN) {
972 retval[0] = (register_t)pt.data;
973 error = 0;
974 }
975
976 if (error == ENOTTY) {
977 DPRINTF(("linux_machdepioctl: invalid ioctl %08lx\n",
978 com));
979 }
980 goto out;
981 }
982 SCARG(&bia, com) = com;
983 error = sys_ioctl(curlwp, &bia, retval);
984 out:
985 fd_putfile(fd);
986 return error;
987 }
988
989 /*
990 * Set I/O permissions for a process. Just set the maximum level
991 * right away (ignoring the argument), otherwise we would have
992 * to rely on I/O permission maps, which are not implemented.
993 */
994 int
linux_sys_iopl(struct lwp * l,const struct linux_sys_iopl_args * uap,register_t * retval)995 linux_sys_iopl(struct lwp *l, const struct linux_sys_iopl_args *uap, register_t *retval)
996 {
997 /* {
998 syscallarg(int) level;
999 } */
1000 struct trapframe *fp = l->l_md.md_regs;
1001
1002 if (kauth_authorize_machdep(l->l_cred, KAUTH_MACHDEP_IOPL,
1003 NULL, NULL, NULL, NULL) != 0)
1004 return EPERM;
1005 fp->tf_eflags |= PSL_IOPL;
1006 *retval = 0;
1007 return 0;
1008 }
1009
1010 /*
1011 * See above. If a root process tries to set access to an I/O port,
1012 * just let it have the whole range.
1013 */
1014 int
linux_sys_ioperm(struct lwp * l,const struct linux_sys_ioperm_args * uap,register_t * retval)1015 linux_sys_ioperm(struct lwp *l, const struct linux_sys_ioperm_args *uap, register_t *retval)
1016 {
1017 /* {
1018 syscallarg(unsigned int) lo;
1019 syscallarg(unsigned int) hi;
1020 syscallarg(int) val;
1021 } */
1022 struct trapframe *fp = l->l_md.md_regs;
1023
1024 if (kauth_authorize_machdep(l->l_cred, SCARG(uap, val) ?
1025 KAUTH_MACHDEP_IOPERM_SET : KAUTH_MACHDEP_IOPERM_GET, NULL, NULL,
1026 NULL, NULL) != 0)
1027 return EPERM;
1028 if (SCARG(uap, val))
1029 fp->tf_eflags |= PSL_IOPL;
1030 *retval = 0;
1031 return 0;
1032 }
1033
1034 int
linux_usertrap(struct lwp * l,vaddr_t trapaddr,void * arg)1035 linux_usertrap(struct lwp *l, vaddr_t trapaddr,
1036 void *arg)
1037 {
1038 return 0;
1039 }
1040
1041 const char *
linux_get_uname_arch(void)1042 linux_get_uname_arch(void)
1043 {
1044 static char uname_arch[5] = "i386";
1045
1046 if (uname_arch[1] == '3')
1047 uname_arch[1] += cpu_class;
1048 return uname_arch;
1049 }
1050