xref: /netbsd-src/sys/compat/linux/arch/i386/linux_machdep.c (revision 4bfc10355ca5ccd94d950ad6f7092be3470193fa)
1 /*	$NetBSD: linux_machdep.c,v 1.143 2009/03/21 14:41:30 ad Exp $	*/
2 
3 /*-
4  * Copyright (c) 1995, 2000, 2008, 2009 The NetBSD Foundation, Inc.
5  * All rights reserved.
6  *
7  * This code is derived from software contributed to The NetBSD Foundation
8  * by Frank van der Linden, and by Andrew Doran.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29  * POSSIBILITY OF SUCH DAMAGE.
30  */
31 
32 #include <sys/cdefs.h>
33 __KERNEL_RCSID(0, "$NetBSD: linux_machdep.c,v 1.143 2009/03/21 14:41:30 ad Exp $");
34 
35 #if defined(_KERNEL_OPT)
36 #include "opt_vm86.h"
37 #include "opt_user_ldt.h"
38 #endif
39 
40 #include <sys/param.h>
41 #include <sys/systm.h>
42 #include <sys/signalvar.h>
43 #include <sys/kernel.h>
44 #include <sys/proc.h>
45 #include <sys/user.h>
46 #include <sys/buf.h>
47 #include <sys/reboot.h>
48 #include <sys/conf.h>
49 #include <sys/exec.h>
50 #include <sys/file.h>
51 #include <sys/callout.h>
52 #include <sys/malloc.h>
53 #include <sys/mbuf.h>
54 #include <sys/msgbuf.h>
55 #include <sys/mount.h>
56 #include <sys/vnode.h>
57 #include <sys/device.h>
58 #include <sys/syscallargs.h>
59 #include <sys/filedesc.h>
60 #include <sys/exec_elf.h>
61 #include <sys/disklabel.h>
62 #include <sys/ioctl.h>
63 #include <sys/wait.h>
64 #include <sys/kauth.h>
65 #include <sys/kmem.h>
66 
67 #include <miscfs/specfs/specdev.h>
68 
69 #include <compat/linux/common/linux_types.h>
70 #include <compat/linux/common/linux_signal.h>
71 #include <compat/linux/common/linux_util.h>
72 #include <compat/linux/common/linux_ioctl.h>
73 #include <compat/linux/common/linux_hdio.h>
74 #include <compat/linux/common/linux_exec.h>
75 #include <compat/linux/common/linux_machdep.h>
76 #include <compat/linux/common/linux_errno.h>
77 
78 #include <compat/linux/linux_syscallargs.h>
79 
80 #include <sys/cpu.h>
81 #include <machine/cpufunc.h>
82 #include <machine/psl.h>
83 #include <machine/reg.h>
84 #include <machine/segments.h>
85 #include <machine/specialreg.h>
86 #include <machine/sysarch.h>
87 #include <machine/vm86.h>
88 #include <machine/vmparam.h>
89 
90 /*
91  * To see whether wscons is configured (for virtual console ioctl calls).
92  */
93 #if defined(_KERNEL_OPT)
94 #include "wsdisplay.h"
95 #endif
96 #if (NWSDISPLAY > 0)
97 #include <dev/wscons/wsconsio.h>
98 #include <dev/wscons/wsdisplay_usl_io.h>
99 #if defined(_KERNEL_OPT)
100 #include "opt_xserver.h"
101 #endif
102 #endif
103 
104 #ifdef DEBUG_LINUX
105 #define DPRINTF(a) uprintf a
106 #else
107 #define DPRINTF(a)
108 #endif
109 
110 static struct biosdisk_info *fd2biosinfo(struct proc *, struct file *);
111 extern struct disklist *x86_alldisks;
112 static void linux_save_ucontext(struct lwp *, struct trapframe *,
113     const sigset_t *, struct sigaltstack *, struct linux_ucontext *);
114 static void linux_save_sigcontext(struct lwp *, struct trapframe *,
115     const sigset_t *, struct linux_sigcontext *);
116 static int linux_restore_sigcontext(struct lwp *,
117     struct linux_sigcontext *, register_t *);
118 static void linux_rt_sendsig(const ksiginfo_t *, const sigset_t *);
119 static void linux_old_sendsig(const ksiginfo_t *, const sigset_t *);
120 
121 extern char linux_sigcode[], linux_rt_sigcode[];
122 /*
123  * Deal with some i386-specific things in the Linux emulation code.
124  */
125 
126 void
127 linux_setregs(struct lwp *l, struct exec_package *epp, u_long stack)
128 {
129 	struct pcb *pcb = &l->l_addr->u_pcb;
130 	struct trapframe *tf;
131 
132 #if NNPX > 0
133 	/* If we were using the FPU, forget about it. */
134 	if (npxproc == l)
135 		npxdrop();
136 #endif
137 
138 #ifdef USER_LDT
139 	pmap_ldt_cleanup(l);
140 #endif
141 
142 	l->l_md.md_flags &= ~MDL_USEDFPU;
143 
144 	if (i386_use_fxsave) {
145 		pcb->pcb_savefpu.sv_xmm.sv_env.en_cw = __Linux_NPXCW__;
146 		pcb->pcb_savefpu.sv_xmm.sv_env.en_mxcsr = __INITIAL_MXCSR__;
147 	} else
148 		pcb->pcb_savefpu.sv_87.sv_env.en_cw = __Linux_NPXCW__;
149 
150 	tf = l->l_md.md_regs;
151 	tf->tf_gs = GSEL(GUDATA_SEL, SEL_UPL);
152 	tf->tf_fs = GSEL(GUDATA_SEL, SEL_UPL);
153 	tf->tf_es = GSEL(GUDATA_SEL, SEL_UPL);
154 	tf->tf_ds = GSEL(GUDATA_SEL, SEL_UPL);
155 	tf->tf_edi = 0;
156 	tf->tf_esi = 0;
157 	tf->tf_ebp = 0;
158 	tf->tf_ebx = (int)l->l_proc->p_psstr;
159 	tf->tf_edx = 0;
160 	tf->tf_ecx = 0;
161 	tf->tf_eax = 0;
162 	tf->tf_eip = epp->ep_entry;
163 	tf->tf_cs = GSEL(GUCODEBIG_SEL, SEL_UPL);
164 	tf->tf_eflags = PSL_USERSET;
165 	tf->tf_esp = stack;
166 	tf->tf_ss = GSEL(GUDATA_SEL, SEL_UPL);
167 }
168 
169 /*
170  * Send an interrupt to process.
171  *
172  * Stack is set up to allow sigcode stored
173  * in u. to call routine, followed by kcall
174  * to sigreturn routine below.  After sigreturn
175  * resets the signal mask, the stack, and the
176  * frame pointer, it returns to the user
177  * specified pc, psl.
178  */
179 
180 void
181 linux_sendsig(const ksiginfo_t *ksi, const sigset_t *mask)
182 {
183 	if (SIGACTION(curproc, ksi->ksi_signo).sa_flags & SA_SIGINFO)
184 		linux_rt_sendsig(ksi, mask);
185 	else
186 		linux_old_sendsig(ksi, mask);
187 }
188 
189 
190 static void
191 linux_save_ucontext(struct lwp *l, struct trapframe *tf, const sigset_t *mask, struct sigaltstack *sas, struct linux_ucontext *uc)
192 {
193 	uc->uc_flags = 0;
194 	uc->uc_link = NULL;
195 	native_to_linux_sigaltstack(&uc->uc_stack, sas);
196 	linux_save_sigcontext(l, tf, mask, &uc->uc_mcontext);
197 	native_to_linux_sigset(&uc->uc_sigmask, mask);
198 	(void)memset(&uc->uc_fpregs_mem, 0, sizeof(uc->uc_fpregs_mem));
199 }
200 
201 static void
202 linux_save_sigcontext(struct lwp *l, struct trapframe *tf, const sigset_t *mask, struct linux_sigcontext *sc)
203 {
204 	/* Save register context. */
205 #ifdef VM86
206 	if (tf->tf_eflags & PSL_VM) {
207 		sc->sc_gs = tf->tf_vm86_gs;
208 		sc->sc_fs = tf->tf_vm86_fs;
209 		sc->sc_es = tf->tf_vm86_es;
210 		sc->sc_ds = tf->tf_vm86_ds;
211 		sc->sc_eflags = get_vflags(l);
212 	} else
213 #endif
214 	{
215 		sc->sc_gs = tf->tf_gs;
216 		sc->sc_fs = tf->tf_fs;
217 		sc->sc_es = tf->tf_es;
218 		sc->sc_ds = tf->tf_ds;
219 		sc->sc_eflags = tf->tf_eflags;
220 	}
221 	sc->sc_edi = tf->tf_edi;
222 	sc->sc_esi = tf->tf_esi;
223 	sc->sc_esp = tf->tf_esp;
224 	sc->sc_ebp = tf->tf_ebp;
225 	sc->sc_ebx = tf->tf_ebx;
226 	sc->sc_edx = tf->tf_edx;
227 	sc->sc_ecx = tf->tf_ecx;
228 	sc->sc_eax = tf->tf_eax;
229 	sc->sc_eip = tf->tf_eip;
230 	sc->sc_cs = tf->tf_cs;
231 	sc->sc_esp_at_signal = tf->tf_esp;
232 	sc->sc_ss = tf->tf_ss;
233 	sc->sc_err = tf->tf_err;
234 	sc->sc_trapno = tf->tf_trapno;
235 	sc->sc_cr2 = l->l_addr->u_pcb.pcb_cr2;
236 	sc->sc_387 = NULL;
237 
238 	/* Save signal stack. */
239 	/* Linux doesn't save the onstack flag in sigframe */
240 
241 	/* Save signal mask. */
242 	native_to_linux_old_sigset(&sc->sc_mask, mask);
243 }
244 
245 static void
246 linux_rt_sendsig(const ksiginfo_t *ksi, const sigset_t *mask)
247 {
248 	struct lwp *l = curlwp;
249 	struct proc *p = l->l_proc;
250 	struct trapframe *tf;
251 	struct linux_rt_sigframe *fp, frame;
252 	int onstack, error;
253 	linux_siginfo_t *lsi;
254 	int sig = ksi->ksi_signo;
255 	sig_t catcher = SIGACTION(p, sig).sa_handler;
256 	struct sigaltstack *sas = &l->l_sigstk;
257 
258 	tf = l->l_md.md_regs;
259 	/* Do we need to jump onto the signal stack? */
260 	onstack = (sas->ss_flags & (SS_DISABLE | SS_ONSTACK)) == 0 &&
261 	    (SIGACTION(p, sig).sa_flags & SA_ONSTACK) != 0;
262 
263 
264 	/* Allocate space for the signal handler context. */
265 	if (onstack)
266 		fp = (struct linux_rt_sigframe *)((char *)sas->ss_sp +
267 		    sas->ss_size);
268 	else
269 		fp = (struct linux_rt_sigframe *)tf->tf_esp;
270 	fp--;
271 
272 	DPRINTF(("rt: onstack = %d, fp = %p sig = %d eip = 0x%x cr2 = 0x%x\n",
273 	    onstack, fp, sig, tf->tf_eip, l->l_addr->u_pcb.pcb_cr2));
274 
275 	/* Build stack frame for signal trampoline. */
276 	frame.sf_handler = catcher;
277 	frame.sf_sig = native_to_linux_signo[sig];
278 	frame.sf_sip = &fp->sf_si;
279 	frame.sf_ucp = &fp->sf_uc;
280 
281 	/*
282 	 * XXX: the following code assumes that the constants for
283 	 * siginfo are the same between linux and NetBSD.
284 	 */
285 	(void)memset(lsi = &frame.sf_si, 0, sizeof(frame.sf_si));
286 	lsi->lsi_errno = native_to_linux_errno[ksi->ksi_errno];
287 	lsi->lsi_code = ksi->ksi_code;
288 	switch (lsi->lsi_signo = frame.sf_sig) {
289 	case LINUX_SIGILL:
290 	case LINUX_SIGFPE:
291 	case LINUX_SIGSEGV:
292 	case LINUX_SIGBUS:
293 	case LINUX_SIGTRAP:
294 		lsi->lsi_addr = ksi->ksi_addr;
295 		break;
296 	case LINUX_SIGCHLD:
297 		lsi->lsi_uid = ksi->ksi_uid;
298 		lsi->lsi_pid = ksi->ksi_pid;
299 		lsi->lsi_utime = ksi->ksi_utime;
300 		lsi->lsi_stime = ksi->ksi_stime;
301 		/* XXX is that right? */
302 		lsi->lsi_status = WEXITSTATUS(ksi->ksi_status);
303 		break;
304 	case LINUX_SIGIO:
305 		lsi->lsi_band = ksi->ksi_band;
306 		lsi->lsi_fd = ksi->ksi_fd;
307 		break;
308 	default:
309 		lsi->lsi_uid = ksi->ksi_uid;
310 		lsi->lsi_pid = ksi->ksi_pid;
311 		if (lsi->lsi_signo == LINUX_SIGALRM ||
312 		    lsi->lsi_signo >= LINUX_SIGRTMIN)
313 			lsi->lsi_value.sival_ptr = ksi->ksi_value.sival_ptr;
314 		break;
315 	}
316 
317 	/* Save register context. */
318 	linux_save_ucontext(l, tf, mask, sas, &frame.sf_uc);
319 	sendsig_reset(l, sig);
320 
321 	mutex_exit(p->p_lock);
322 	error = copyout(&frame, fp, sizeof(frame));
323 	mutex_enter(p->p_lock);
324 
325 	if (error != 0) {
326 		/*
327 		 * Process has trashed its stack; give it an illegal
328 		 * instruction to halt it in its tracks.
329 		 */
330 		sigexit(l, SIGILL);
331 		/* NOTREACHED */
332 	}
333 
334 	/*
335 	 * Build context to run handler in.
336 	 */
337 	tf->tf_gs = GSEL(GUDATA_SEL, SEL_UPL);
338 	tf->tf_fs = GSEL(GUDATA_SEL, SEL_UPL);
339 	tf->tf_es = GSEL(GUDATA_SEL, SEL_UPL);
340 	tf->tf_ds = GSEL(GUDATA_SEL, SEL_UPL);
341 	tf->tf_eip = ((int)p->p_sigctx.ps_sigcode) +
342 	    (linux_rt_sigcode - linux_sigcode);
343 	tf->tf_cs = GSEL(GUCODE_SEL, SEL_UPL);
344 	tf->tf_eflags &= ~PSL_CLEARSIG;
345 	tf->tf_esp = (int)fp;
346 	tf->tf_ss = GSEL(GUDATA_SEL, SEL_UPL);
347 
348 	/* Remember that we're now on the signal stack. */
349 	if (onstack)
350 		sas->ss_flags |= SS_ONSTACK;
351 }
352 
353 static void
354 linux_old_sendsig(const ksiginfo_t *ksi, const sigset_t *mask)
355 {
356 	struct lwp *l = curlwp;
357 	struct proc *p = l->l_proc;
358 	struct trapframe *tf;
359 	struct linux_sigframe *fp, frame;
360 	int onstack, error;
361 	int sig = ksi->ksi_signo;
362 	sig_t catcher = SIGACTION(p, sig).sa_handler;
363 	struct sigaltstack *sas = &l->l_sigstk;
364 
365 	tf = l->l_md.md_regs;
366 
367 	/* Do we need to jump onto the signal stack? */
368 	onstack = (sas->ss_flags & (SS_DISABLE | SS_ONSTACK)) == 0 &&
369 	    (SIGACTION(p, sig).sa_flags & SA_ONSTACK) != 0;
370 
371 	/* Allocate space for the signal handler context. */
372 	if (onstack)
373 		fp = (struct linux_sigframe *) ((char *)sas->ss_sp +
374 		    sas->ss_size);
375 	else
376 		fp = (struct linux_sigframe *)tf->tf_esp;
377 	fp--;
378 
379 	DPRINTF(("old: onstack = %d, fp = %p sig = %d eip = 0x%x cr2 = 0x%x\n",
380 	    onstack, fp, sig, tf->tf_eip, l->l_addr->u_pcb.pcb_cr2));
381 
382 	/* Build stack frame for signal trampoline. */
383 	frame.sf_handler = catcher;
384 	frame.sf_sig = native_to_linux_signo[sig];
385 
386 	linux_save_sigcontext(l, tf, mask, &frame.sf_sc);
387 	sendsig_reset(l, sig);
388 
389 	mutex_exit(p->p_lock);
390 	error = copyout(&frame, fp, sizeof(frame));
391 	mutex_enter(p->p_lock);
392 
393 	if (error != 0) {
394 		/*
395 		 * Process has trashed its stack; give it an illegal
396 		 * instruction to halt it in its tracks.
397 		 */
398 		sigexit(l, SIGILL);
399 		/* NOTREACHED */
400 	}
401 
402 	/*
403 	 * Build context to run handler in.
404 	 */
405 	tf->tf_gs = GSEL(GUDATA_SEL, SEL_UPL);
406 	tf->tf_fs = GSEL(GUDATA_SEL, SEL_UPL);
407 	tf->tf_es = GSEL(GUDATA_SEL, SEL_UPL);
408 	tf->tf_ds = GSEL(GUDATA_SEL, SEL_UPL);
409 	tf->tf_eip = (int)p->p_sigctx.ps_sigcode;
410 	tf->tf_cs = GSEL(GUCODEBIG_SEL, SEL_UPL);
411 	tf->tf_eflags &= ~PSL_CLEARSIG;
412 	tf->tf_esp = (int)fp;
413 	tf->tf_ss = GSEL(GUDATA_SEL, SEL_UPL);
414 
415 	/* Remember that we're now on the signal stack. */
416 	if (onstack)
417 		sas->ss_flags |= SS_ONSTACK;
418 }
419 
420 /*
421  * System call to cleanup state after a signal
422  * has been taken.  Reset signal mask and
423  * stack state from context left by sendsig (above).
424  * Return to previous pc and psl as specified by
425  * context left by sendsig. Check carefully to
426  * make sure that the user has not modified the
427  * psl to gain improper privileges or to cause
428  * a machine fault.
429  */
430 int
431 linux_sys_rt_sigreturn(struct lwp *l, const struct linux_sys_rt_sigreturn_args *uap, register_t *retval)
432 {
433 	/* {
434 		syscallarg(struct linux_ucontext *) ucp;
435 	} */
436 	struct linux_ucontext context, *ucp = SCARG(uap, ucp);
437 	int error;
438 
439 	/*
440 	 * The trampoline code hands us the context.
441 	 * It is unsafe to keep track of it ourselves, in the event that a
442 	 * program jumps out of a signal handler.
443 	 */
444 	if ((error = copyin(ucp, &context, sizeof(*ucp))) != 0)
445 		return error;
446 
447 	/* XXX XAX we can do better here by using more of the ucontext */
448 	return linux_restore_sigcontext(l, &context.uc_mcontext, retval);
449 }
450 
451 int
452 linux_sys_sigreturn(struct lwp *l, const struct linux_sys_sigreturn_args *uap, register_t *retval)
453 {
454 	/* {
455 		syscallarg(struct linux_sigcontext *) scp;
456 	} */
457 	struct linux_sigcontext context, *scp = SCARG(uap, scp);
458 	int error;
459 
460 	/*
461 	 * The trampoline code hands us the context.
462 	 * It is unsafe to keep track of it ourselves, in the event that a
463 	 * program jumps out of a signal handler.
464 	 */
465 	if ((error = copyin((void *)scp, &context, sizeof(*scp))) != 0)
466 		return error;
467 	return linux_restore_sigcontext(l, &context, retval);
468 }
469 
470 static int
471 linux_restore_sigcontext(struct lwp *l, struct linux_sigcontext *scp,
472     register_t *retval)
473 {
474 	struct proc *p = l->l_proc;
475 	struct sigaltstack *sas = &l->l_sigstk;
476 	struct trapframe *tf;
477 	sigset_t mask;
478 	ssize_t ss_gap;
479 	/* Restore register context. */
480 	tf = l->l_md.md_regs;
481 
482 	DPRINTF(("sigreturn enter esp=%x eip=%x\n", tf->tf_esp, tf->tf_eip));
483 #ifdef VM86
484 	if (scp->sc_eflags & PSL_VM) {
485 		void syscall_vm86(struct trapframe *);
486 
487 		tf->tf_vm86_gs = scp->sc_gs;
488 		tf->tf_vm86_fs = scp->sc_fs;
489 		tf->tf_vm86_es = scp->sc_es;
490 		tf->tf_vm86_ds = scp->sc_ds;
491 		set_vflags(l, scp->sc_eflags);
492 		p->p_md.md_syscall = syscall_vm86;
493 	} else
494 #endif
495 	{
496 		/*
497 		 * Check for security violations.  If we're returning to
498 		 * protected mode, the CPU will validate the segment registers
499 		 * automatically and generate a trap on violations.  We handle
500 		 * the trap, rather than doing all of the checking here.
501 		 */
502 		if (((scp->sc_eflags ^ tf->tf_eflags) & PSL_USERSTATIC) != 0 ||
503 		    !USERMODE(scp->sc_cs, scp->sc_eflags))
504 			return EINVAL;
505 
506 		tf->tf_gs = scp->sc_gs;
507 		tf->tf_fs = scp->sc_fs;
508 		tf->tf_es = scp->sc_es;
509 		tf->tf_ds = scp->sc_ds;
510 #ifdef VM86
511 		if (tf->tf_eflags & PSL_VM)
512 			(*p->p_emul->e_syscall_intern)(p);
513 #endif
514 		tf->tf_eflags = scp->sc_eflags;
515 	}
516 	tf->tf_edi = scp->sc_edi;
517 	tf->tf_esi = scp->sc_esi;
518 	tf->tf_ebp = scp->sc_ebp;
519 	tf->tf_ebx = scp->sc_ebx;
520 	tf->tf_edx = scp->sc_edx;
521 	tf->tf_ecx = scp->sc_ecx;
522 	tf->tf_eax = scp->sc_eax;
523 	tf->tf_eip = scp->sc_eip;
524 	tf->tf_cs = scp->sc_cs;
525 	tf->tf_esp = scp->sc_esp_at_signal;
526 	tf->tf_ss = scp->sc_ss;
527 
528 	/* Restore signal stack. */
529 	/*
530 	 * Linux really does it this way; it doesn't have space in sigframe
531 	 * to save the onstack flag.
532 	 */
533 	mutex_enter(p->p_lock);
534 	ss_gap = (ssize_t)((char *)scp->sc_esp_at_signal - (char *)sas->ss_sp);
535 	if (ss_gap >= 0 && ss_gap < sas->ss_size)
536 		sas->ss_flags |= SS_ONSTACK;
537 	else
538 		sas->ss_flags &= ~SS_ONSTACK;
539 
540 	/* Restore signal mask. */
541 	linux_old_to_native_sigset(&mask, &scp->sc_mask);
542 	(void) sigprocmask1(l, SIG_SETMASK, &mask, 0);
543 	mutex_exit(p->p_lock);
544 
545 	DPRINTF(("sigreturn exit esp=%x eip=%x\n", tf->tf_esp, tf->tf_eip));
546 	return EJUSTRETURN;
547 }
548 
549 #ifdef USER_LDT
550 
551 static int
552 linux_read_ldt(struct lwp *l, const struct linux_sys_modify_ldt_args *uap,
553     register_t *retval)
554 {
555 	struct x86_get_ldt_args gl;
556 	int error;
557 	union descriptor *ldt_buf;
558 	size_t sz;
559 
560 	/*
561 	 * I've checked the linux code - this function is asymetric with
562 	 * linux_write_ldt, and returns raw ldt entries.
563 	 * NB, the code I saw zerod the spare parts of the user buffer.
564 	 */
565 
566 	DPRINTF(("linux_read_ldt!"));
567 
568 	sz = 8192 * sizeof(*ldt_buf);
569 	ldt_buf = kmem_zalloc(sz, KM_SLEEP);
570 	gl.start = 0;
571 	gl.desc = NULL;
572 	gl.num = SCARG(uap, bytecount) / sizeof(union descriptor);
573 	error = x86_get_ldt1(l, &gl, ldt_buf);
574 	/* NB gl.num might have changed */
575 	if (error == 0) {
576 		*retval = gl.num * sizeof *ldt;
577 		error = copyout(ldt_buf, SCARG(uap, ptr),
578 		    gl.num * sizeof *ldt_buf);
579 	}
580 	kmem_free(ldt_buf, sz);
581 
582 	return error;
583 }
584 
585 struct linux_ldt_info {
586 	u_int entry_number;
587 	u_long base_addr;
588 	u_int limit;
589 	u_int seg_32bit:1;
590 	u_int contents:2;
591 	u_int read_exec_only:1;
592 	u_int limit_in_pages:1;
593 	u_int seg_not_present:1;
594 	u_int useable:1;
595 };
596 
597 static int
598 linux_write_ldt(struct lwp *l, const struct linux_sys_modify_ldt_args *uap,
599     int oldmode)
600 {
601 	struct linux_ldt_info ldt_info;
602 	union descriptor d;
603 	struct x86_set_ldt_args sl;
604 	int error;
605 
606 	DPRINTF(("linux_write_ldt %d\n", oldmode));
607 	if (SCARG(uap, bytecount) != sizeof(ldt_info))
608 		return (EINVAL);
609 	if ((error = copyin(SCARG(uap, ptr), &ldt_info, sizeof(ldt_info))) != 0)
610 		return error;
611 	if (ldt_info.entry_number >= 8192)
612 		return (EINVAL);
613 	if (ldt_info.contents == 3) {
614 		if (oldmode)
615 			return (EINVAL);
616 		if (ldt_info.seg_not_present)
617 			return (EINVAL);
618 	}
619 
620 	if (ldt_info.base_addr == 0 && ldt_info.limit == 0 &&
621 	    (oldmode || (ldt_info.contents == 0 &&
622 	    ldt_info.read_exec_only == 1 && ldt_info.seg_32bit == 0 &&
623 	    ldt_info.limit_in_pages == 0 && ldt_info.seg_not_present == 1 &&
624 	    ldt_info.useable == 0))) {
625 		/* this means you should zero the ldt */
626 		(void)memset(&d, 0, sizeof(d));
627 	} else {
628 		d.sd.sd_lobase = ldt_info.base_addr & 0xffffff;
629 		d.sd.sd_hibase = (ldt_info.base_addr >> 24) & 0xff;
630 		d.sd.sd_lolimit = ldt_info.limit & 0xffff;
631 		d.sd.sd_hilimit = (ldt_info.limit >> 16) & 0xf;
632 		d.sd.sd_type = 16 | (ldt_info.contents << 2) |
633 		    (!ldt_info.read_exec_only << 1);
634 		d.sd.sd_dpl = SEL_UPL;
635 		d.sd.sd_p = !ldt_info.seg_not_present;
636 		d.sd.sd_def32 = ldt_info.seg_32bit;
637 		d.sd.sd_gran = ldt_info.limit_in_pages;
638 		if (!oldmode)
639 			d.sd.sd_xx = ldt_info.useable;
640 		else
641 			d.sd.sd_xx = 0;
642 	}
643 	sl.start = ldt_info.entry_number;
644 	sl.desc = NULL;;
645 	sl.num = 1;
646 
647 	DPRINTF(("linux_write_ldt: idx=%d, base=0x%lx, limit=0x%x\n",
648 	    ldt_info.entry_number, ldt_info.base_addr, ldt_info.limit));
649 
650 	return x86_set_ldt1(l, &sl, &d);
651 }
652 
653 #endif /* USER_LDT */
654 
655 int
656 linux_sys_modify_ldt(struct lwp *l, const struct linux_sys_modify_ldt_args *uap, register_t *retval)
657 {
658 	/* {
659 		syscallarg(int) func;
660 		syscallarg(void *) ptr;
661 		syscallarg(size_t) bytecount;
662 	} */
663 
664 	switch (SCARG(uap, func)) {
665 #ifdef USER_LDT
666 	case 0:
667 		return linux_read_ldt(l, (const void *)uap, retval);
668 	case 1:
669 		return linux_write_ldt(l, (const void *)uap, 1);
670 	case 2:
671 #ifdef notyet
672 		return (linux_read_default_ldt(l, (const void *)uap, retval);
673 #else
674 		return (ENOSYS);
675 #endif
676 	case 0x11:
677 		return linux_write_ldt(l, (const void *)uap, 0);
678 #endif /* USER_LDT */
679 
680 	default:
681 		return (ENOSYS);
682 	}
683 }
684 
685 /*
686  * XXX Pathetic hack to make svgalib work. This will fake the major
687  * device number of an opened VT so that svgalib likes it. grmbl.
688  * Should probably do it 'wrong the right way' and use a mapping
689  * array for all major device numbers, and map linux_mknod too.
690  */
691 dev_t
692 linux_fakedev(dev_t dev, int raw)
693 {
694 	extern const struct cdevsw ptc_cdevsw, pts_cdevsw;
695 	const struct cdevsw *cd = cdevsw_lookup(dev);
696 
697 	if (raw) {
698 #if (NWSDISPLAY > 0)
699 		extern const struct cdevsw wsdisplay_cdevsw;
700 		if (cd == &wsdisplay_cdevsw)
701 			return makedev(LINUX_CONS_MAJOR, (minor(dev) + 1));
702 #endif
703 	}
704 
705 	if (cd == &ptc_cdevsw)
706 		return makedev(LINUX_PTC_MAJOR, minor(dev));
707 	if (cd == &pts_cdevsw)
708 		return makedev(LINUX_PTS_MAJOR, minor(dev));
709 
710 	return dev;
711 }
712 
713 #if (NWSDISPLAY > 0)
714 /*
715  * That's not complete, but enough to get an X server running.
716  */
717 #define NR_KEYS 128
718 static const u_short plain_map[NR_KEYS] = {
719 	0x0200,	0x001b,	0x0031,	0x0032,	0x0033,	0x0034,	0x0035,	0x0036,
720 	0x0037,	0x0038,	0x0039,	0x0030,	0x002d,	0x003d,	0x007f,	0x0009,
721 	0x0b71,	0x0b77,	0x0b65,	0x0b72,	0x0b74,	0x0b79,	0x0b75,	0x0b69,
722 	0x0b6f,	0x0b70,	0x005b,	0x005d,	0x0201,	0x0702,	0x0b61,	0x0b73,
723 	0x0b64,	0x0b66,	0x0b67,	0x0b68,	0x0b6a,	0x0b6b,	0x0b6c,	0x003b,
724 	0x0027,	0x0060,	0x0700,	0x005c,	0x0b7a,	0x0b78,	0x0b63,	0x0b76,
725 	0x0b62,	0x0b6e,	0x0b6d,	0x002c,	0x002e,	0x002f,	0x0700,	0x030c,
726 	0x0703,	0x0020,	0x0207,	0x0100,	0x0101,	0x0102,	0x0103,	0x0104,
727 	0x0105,	0x0106,	0x0107,	0x0108,	0x0109,	0x0208,	0x0209,	0x0307,
728 	0x0308,	0x0309,	0x030b,	0x0304,	0x0305,	0x0306,	0x030a,	0x0301,
729 	0x0302,	0x0303,	0x0300,	0x0310,	0x0206,	0x0200,	0x003c,	0x010a,
730 	0x010b,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
731 	0x030e,	0x0702,	0x030d,	0x001c,	0x0701,	0x0205,	0x0114,	0x0603,
732 	0x0118,	0x0601,	0x0602,	0x0117,	0x0600,	0x0119,	0x0115,	0x0116,
733 	0x011a,	0x010c,	0x010d,	0x011b,	0x011c,	0x0110,	0x0311,	0x011d,
734 	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
735 }, shift_map[NR_KEYS] = {
736 	0x0200,	0x001b,	0x0021,	0x0040,	0x0023,	0x0024,	0x0025,	0x005e,
737 	0x0026,	0x002a,	0x0028,	0x0029,	0x005f,	0x002b,	0x007f,	0x0009,
738 	0x0b51,	0x0b57,	0x0b45,	0x0b52,	0x0b54,	0x0b59,	0x0b55,	0x0b49,
739 	0x0b4f,	0x0b50,	0x007b,	0x007d,	0x0201,	0x0702,	0x0b41,	0x0b53,
740 	0x0b44,	0x0b46,	0x0b47,	0x0b48,	0x0b4a,	0x0b4b,	0x0b4c,	0x003a,
741 	0x0022,	0x007e,	0x0700,	0x007c,	0x0b5a,	0x0b58,	0x0b43,	0x0b56,
742 	0x0b42,	0x0b4e,	0x0b4d,	0x003c,	0x003e,	0x003f,	0x0700,	0x030c,
743 	0x0703,	0x0020,	0x0207,	0x010a,	0x010b,	0x010c,	0x010d,	0x010e,
744 	0x010f,	0x0110,	0x0111,	0x0112,	0x0113,	0x0213,	0x0203,	0x0307,
745 	0x0308,	0x0309,	0x030b,	0x0304,	0x0305,	0x0306,	0x030a,	0x0301,
746 	0x0302,	0x0303,	0x0300,	0x0310,	0x0206,	0x0200,	0x003e,	0x010a,
747 	0x010b,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
748 	0x030e,	0x0702,	0x030d,	0x0200,	0x0701,	0x0205,	0x0114,	0x0603,
749 	0x020b,	0x0601,	0x0602,	0x0117,	0x0600,	0x020a,	0x0115,	0x0116,
750 	0x011a,	0x010c,	0x010d,	0x011b,	0x011c,	0x0110,	0x0311,	0x011d,
751 	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
752 }, altgr_map[NR_KEYS] = {
753 	0x0200,	0x0200,	0x0200,	0x0040,	0x0200,	0x0024,	0x0200,	0x0200,
754 	0x007b,	0x005b,	0x005d,	0x007d,	0x005c,	0x0200,	0x0200,	0x0200,
755 	0x0b71,	0x0b77,	0x0918,	0x0b72,	0x0b74,	0x0b79,	0x0b75,	0x0b69,
756 	0x0b6f,	0x0b70,	0x0200,	0x007e,	0x0201,	0x0702,	0x0914,	0x0b73,
757 	0x0917,	0x0919,	0x0b67,	0x0b68,	0x0b6a,	0x0b6b,	0x0b6c,	0x0200,
758 	0x0200,	0x0200,	0x0700,	0x0200,	0x0b7a,	0x0b78,	0x0916,	0x0b76,
759 	0x0915,	0x0b6e,	0x0b6d,	0x0200,	0x0200,	0x0200,	0x0700,	0x030c,
760 	0x0703,	0x0200,	0x0207,	0x050c,	0x050d,	0x050e,	0x050f,	0x0510,
761 	0x0511,	0x0512,	0x0513,	0x0514,	0x0515,	0x0208,	0x0202,	0x0911,
762 	0x0912,	0x0913,	0x030b,	0x090e,	0x090f,	0x0910,	0x030a,	0x090b,
763 	0x090c,	0x090d,	0x090a,	0x0310,	0x0206,	0x0200,	0x007c,	0x0516,
764 	0x0517,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
765 	0x030e,	0x0702,	0x030d,	0x0200,	0x0701,	0x0205,	0x0114,	0x0603,
766 	0x0118,	0x0601,	0x0602,	0x0117,	0x0600,	0x0119,	0x0115,	0x0116,
767 	0x011a,	0x010c,	0x010d,	0x011b,	0x011c,	0x0110,	0x0311,	0x011d,
768 	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
769 }, ctrl_map[NR_KEYS] = {
770 	0x0200,	0x0200,	0x0200,	0x0000,	0x001b,	0x001c,	0x001d,	0x001e,
771 	0x001f,	0x007f,	0x0200,	0x0200,	0x001f,	0x0200,	0x0008,	0x0200,
772 	0x0011,	0x0017,	0x0005,	0x0012,	0x0014,	0x0019,	0x0015,	0x0009,
773 	0x000f,	0x0010,	0x001b,	0x001d,	0x0201,	0x0702,	0x0001,	0x0013,
774 	0x0004,	0x0006,	0x0007,	0x0008,	0x000a,	0x000b,	0x000c,	0x0200,
775 	0x0007,	0x0000,	0x0700,	0x001c,	0x001a,	0x0018,	0x0003,	0x0016,
776 	0x0002,	0x000e,	0x000d,	0x0200,	0x020e,	0x007f,	0x0700,	0x030c,
777 	0x0703,	0x0000,	0x0207,	0x0100,	0x0101,	0x0102,	0x0103,	0x0104,
778 	0x0105,	0x0106,	0x0107,	0x0108,	0x0109,	0x0208,	0x0204,	0x0307,
779 	0x0308,	0x0309,	0x030b,	0x0304,	0x0305,	0x0306,	0x030a,	0x0301,
780 	0x0302,	0x0303,	0x0300,	0x0310,	0x0206,	0x0200,	0x0200,	0x010a,
781 	0x010b,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
782 	0x030e,	0x0702,	0x030d,	0x001c,	0x0701,	0x0205,	0x0114,	0x0603,
783 	0x0118,	0x0601,	0x0602,	0x0117,	0x0600,	0x0119,	0x0115,	0x0116,
784 	0x011a,	0x010c,	0x010d,	0x011b,	0x011c,	0x0110,	0x0311,	0x011d,
785 	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
786 };
787 
788 const u_short * const linux_keytabs[] = {
789 	plain_map, shift_map, altgr_map, altgr_map, ctrl_map
790 };
791 #endif
792 
793 static struct biosdisk_info *
794 fd2biosinfo(struct proc *p, struct file *fp)
795 {
796 	struct vnode *vp;
797 	const char *blkname;
798 	char diskname[16];
799 	int i;
800 	struct nativedisk_info *nip;
801 	struct disklist *dl = x86_alldisks;
802 
803 	if (fp->f_type != DTYPE_VNODE)
804 		return NULL;
805 	vp = (struct vnode *)fp->f_data;
806 
807 	if (vp->v_type != VBLK)
808 		return NULL;
809 
810 	blkname = devsw_blk2name(major(vp->v_rdev));
811 	snprintf(diskname, sizeof diskname, "%s%llu", blkname,
812 	    (unsigned long long)DISKUNIT(vp->v_rdev));
813 
814 	for (i = 0; i < dl->dl_nnativedisks; i++) {
815 		nip = &dl->dl_nativedisks[i];
816 		if (strcmp(diskname, nip->ni_devname))
817 			continue;
818 		if (nip->ni_nmatches != 0)
819 			return &dl->dl_biosdisks[nip->ni_biosmatches[0]];
820 	}
821 
822 	return NULL;
823 }
824 
825 
826 /*
827  * We come here in a last attempt to satisfy a Linux ioctl() call
828  */
829 int
830 linux_machdepioctl(struct lwp *l, const struct linux_sys_ioctl_args *uap, register_t *retval)
831 {
832 	/* {
833 		syscallarg(int) fd;
834 		syscallarg(u_long) com;
835 		syscallarg(void *) data;
836 	} */
837 	struct sys_ioctl_args bia;
838 	u_long com;
839 	int error, error1;
840 #if (NWSDISPLAY > 0)
841 	struct vt_mode lvt;
842 	struct kbentry kbe;
843 #endif
844 	struct linux_hd_geometry hdg;
845 	struct linux_hd_big_geometry hdg_big;
846 	struct biosdisk_info *bip;
847 	file_t *fp;
848 	int fd;
849 	struct disklabel label, *labp;
850 	struct partinfo partp;
851 	int (*ioctlf)(struct file *, u_long, void *);
852 	u_long start, biostotal, realtotal;
853 	u_char heads, sectors;
854 	u_int cylinders;
855 	struct ioctl_pt pt;
856 
857 	fd = SCARG(uap, fd);
858 	SCARG(&bia, fd) = fd;
859 	SCARG(&bia, data) = SCARG(uap, data);
860 	com = SCARG(uap, com);
861 
862 	if ((fp = fd_getfile(fd)) == NULL)
863 		return (EBADF);
864 
865 	switch (com) {
866 #if (NWSDISPLAY > 0)
867 	case LINUX_KDGKBMODE:
868 		com = KDGKBMODE;
869 		break;
870 	case LINUX_KDSKBMODE:
871 		com = KDSKBMODE;
872 		if ((unsigned)SCARG(uap, data) == LINUX_K_MEDIUMRAW)
873 			SCARG(&bia, data) = (void *)K_RAW;
874 		break;
875 	case LINUX_KIOCSOUND:
876 		SCARG(&bia, data) =
877 		    (void *)(((unsigned long)SCARG(&bia, data)) & 0xffff);
878 		/* fall through */
879 	case LINUX_KDMKTONE:
880 		com = KDMKTONE;
881 		break;
882 	case LINUX_KDSETMODE:
883 		com = KDSETMODE;
884 		break;
885 	case LINUX_KDGETMODE:
886 		/* KD_* values are equal to the wscons numbers */
887 		com = WSDISPLAYIO_GMODE;
888 		break;
889 	case LINUX_KDENABIO:
890 		com = KDENABIO;
891 		break;
892 	case LINUX_KDDISABIO:
893 		com = KDDISABIO;
894 		break;
895 	case LINUX_KDGETLED:
896 		com = KDGETLED;
897 		break;
898 	case LINUX_KDSETLED:
899 		com = KDSETLED;
900 		break;
901 	case LINUX_VT_OPENQRY:
902 		com = VT_OPENQRY;
903 		break;
904 	case LINUX_VT_GETMODE:
905 		error = fp->f_ops->fo_ioctl(fp, VT_GETMODE, &lvt);
906 		if (error != 0)
907 			goto out;
908 		lvt.relsig = native_to_linux_signo[lvt.relsig];
909 		lvt.acqsig = native_to_linux_signo[lvt.acqsig];
910 		lvt.frsig = native_to_linux_signo[lvt.frsig];
911 		error = copyout(&lvt, SCARG(uap, data), sizeof (lvt));
912 		goto out;
913 	case LINUX_VT_SETMODE:
914 		error = copyin(SCARG(uap, data), &lvt, sizeof (lvt));
915 		if (error != 0)
916 			goto out;
917 		lvt.relsig = linux_to_native_signo[lvt.relsig];
918 		lvt.acqsig = linux_to_native_signo[lvt.acqsig];
919 		lvt.frsig = linux_to_native_signo[lvt.frsig];
920 		error = fp->f_ops->fo_ioctl(fp, VT_SETMODE, &lvt);
921 		goto out;
922 	case LINUX_VT_DISALLOCATE:
923 		/* XXX should use WSDISPLAYIO_DELSCREEN */
924 		error = 0;
925 		goto out;
926 	case LINUX_VT_RELDISP:
927 		com = VT_RELDISP;
928 		break;
929 	case LINUX_VT_ACTIVATE:
930 		com = VT_ACTIVATE;
931 		break;
932 	case LINUX_VT_WAITACTIVE:
933 		com = VT_WAITACTIVE;
934 		break;
935 	case LINUX_VT_GETSTATE:
936 		com = VT_GETSTATE;
937 		break;
938 	case LINUX_KDGKBTYPE:
939 	    {
940 		static const u_int8_t kb101 = KB_101;
941 
942 		/* This is what Linux does. */
943 		error = copyout(&kb101, SCARG(uap, data), 1);
944 		goto out;
945 	    }
946 	case LINUX_KDGKBENT:
947 		/*
948 		 * The Linux KDGKBENT ioctl is different from the
949 		 * SYSV original. So we handle it in machdep code.
950 		 * XXX We should use keyboard mapping information
951 		 * from wsdisplay, but this would be expensive.
952 		 */
953 		if ((error = copyin(SCARG(uap, data), &kbe,
954 				    sizeof(struct kbentry))))
955 			goto out;
956 		if (kbe.kb_table >= sizeof(linux_keytabs) / sizeof(u_short *)
957 		    || kbe.kb_index >= NR_KEYS) {
958 			error = EINVAL;
959 			goto out;
960 		}
961 		kbe.kb_value = linux_keytabs[kbe.kb_table][kbe.kb_index];
962 		error = copyout(&kbe, SCARG(uap, data),
963 				sizeof(struct kbentry));
964 		goto out;
965 #endif
966 	case LINUX_HDIO_GETGEO:
967 	case LINUX_HDIO_GETGEO_BIG:
968 		/*
969 		 * Try to mimic Linux behaviour: return the BIOS geometry
970 		 * if possible (extending its # of cylinders if it's beyond
971 		 * the 1023 limit), fall back to the MI geometry (i.e.
972 		 * the real geometry) if not found, by returning an
973 		 * error. See common/linux_hdio.c
974 		 */
975 		bip = fd2biosinfo(curproc, fp);
976 		ioctlf = fp->f_ops->fo_ioctl;
977 		error = ioctlf(fp, DIOCGDEFLABEL, (void *)&label);
978 		error1 = ioctlf(fp, DIOCGPART, (void *)&partp);
979 		if (error != 0 && error1 != 0) {
980 			error = error1;
981 			goto out;
982 		}
983 		labp = error != 0 ? &label : partp.disklab;
984 		start = error1 != 0 ? partp.part->p_offset : 0;
985 		if (bip != NULL && bip->bi_head != 0 && bip->bi_sec != 0
986 		    && bip->bi_cyl != 0) {
987 			heads = bip->bi_head;
988 			sectors = bip->bi_sec;
989 			cylinders = bip->bi_cyl;
990 			biostotal = heads * sectors * cylinders;
991 			realtotal = labp->d_ntracks * labp->d_nsectors *
992 			    labp->d_ncylinders;
993 			if (realtotal > biostotal)
994 				cylinders = realtotal / (heads * sectors);
995 		} else {
996 			heads = labp->d_ntracks;
997 			cylinders = labp->d_ncylinders;
998 			sectors = labp->d_nsectors;
999 		}
1000 		if (com == LINUX_HDIO_GETGEO) {
1001 			hdg.start = start;
1002 			hdg.heads = heads;
1003 			hdg.cylinders = cylinders;
1004 			hdg.sectors = sectors;
1005 			error = copyout(&hdg, SCARG(uap, data), sizeof hdg);
1006 			goto out;
1007 		} else {
1008 			hdg_big.start = start;
1009 			hdg_big.heads = heads;
1010 			hdg_big.cylinders = cylinders;
1011 			hdg_big.sectors = sectors;
1012 			error = copyout(&hdg_big, SCARG(uap, data),
1013 			    sizeof hdg_big);
1014 			goto out;
1015 		}
1016 
1017 	default:
1018 		/*
1019 		 * Unknown to us. If it's on a device, just pass it through
1020 		 * using PTIOCLINUX, the device itself might be able to
1021 		 * make some sense of it.
1022 		 * XXX hack: if the function returns EJUSTRETURN,
1023 		 * it has stuffed a sysctl return value in pt.data.
1024 		 */
1025 		ioctlf = fp->f_ops->fo_ioctl;
1026 		pt.com = SCARG(uap, com);
1027 		pt.data = SCARG(uap, data);
1028 		error = ioctlf(fp, PTIOCLINUX, &pt);
1029 		if (error == EJUSTRETURN) {
1030 			retval[0] = (register_t)pt.data;
1031 			error = 0;
1032 		}
1033 
1034 		if (error == ENOTTY) {
1035 			DPRINTF(("linux_machdepioctl: invalid ioctl %08lx\n",
1036 			    com));
1037 		}
1038 		goto out;
1039 	}
1040 	SCARG(&bia, com) = com;
1041 	error = sys_ioctl(curlwp, &bia, retval);
1042 out:
1043 	fd_putfile(fd);
1044 	return error;
1045 }
1046 
1047 /*
1048  * Set I/O permissions for a process. Just set the maximum level
1049  * right away (ignoring the argument), otherwise we would have
1050  * to rely on I/O permission maps, which are not implemented.
1051  */
1052 int
1053 linux_sys_iopl(struct lwp *l, const struct linux_sys_iopl_args *uap, register_t *retval)
1054 {
1055 	/* {
1056 		syscallarg(int) level;
1057 	} */
1058 	struct trapframe *fp = l->l_md.md_regs;
1059 
1060 	if (kauth_authorize_machdep(l->l_cred, KAUTH_MACHDEP_IOPL,
1061 	    NULL, NULL, NULL, NULL) != 0)
1062 		return EPERM;
1063 	fp->tf_eflags |= PSL_IOPL;
1064 	*retval = 0;
1065 	return 0;
1066 }
1067 
1068 /*
1069  * See above. If a root process tries to set access to an I/O port,
1070  * just let it have the whole range.
1071  */
1072 int
1073 linux_sys_ioperm(struct lwp *l, const struct linux_sys_ioperm_args *uap, register_t *retval)
1074 {
1075 	/* {
1076 		syscallarg(unsigned int) lo;
1077 		syscallarg(unsigned int) hi;
1078 		syscallarg(int) val;
1079 	} */
1080 	struct trapframe *fp = l->l_md.md_regs;
1081 
1082 	if (kauth_authorize_machdep(l->l_cred, SCARG(uap, val) ?
1083 	    KAUTH_MACHDEP_IOPERM_SET : KAUTH_MACHDEP_IOPERM_GET, NULL, NULL,
1084 	    NULL, NULL) != 0)
1085 		return EPERM;
1086 	if (SCARG(uap, val))
1087 		fp->tf_eflags |= PSL_IOPL;
1088 	*retval = 0;
1089 	return 0;
1090 }
1091 
1092 int
1093 linux_usertrap(struct lwp *l, vaddr_t trapaddr,
1094     void *arg)
1095 {
1096 	return 0;
1097 }
1098 
1099 const char *
1100 linux_get_uname_arch(void)
1101 {
1102 	static char uname_arch[5] = "i386";
1103 
1104 	if (uname_arch[1] == '3')
1105 		uname_arch[1] += cpu_class;
1106 	return uname_arch;
1107 }
1108 
1109 #ifdef LINUX_NPTL
1110 void *
1111 linux_get_newtls(struct lwp *l)
1112 {
1113 #if 0
1114 	struct trapframe *tf = l->l_md.md_regs;
1115 #endif
1116 
1117 	/* XXX: Implement me */
1118 	return NULL;
1119 }
1120 
1121 int
1122 linux_set_newtls(struct lwp *l, void *tls)
1123 {
1124 	/* XXX: Implement me */
1125 	return 0;
1126 }
1127 #endif
1128