xref: /netbsd-src/sys/compat/linux/arch/i386/linux_machdep.c (revision b1c86f5f087524e68db12794ee9c3e3da1ab17a0)
1 /*	$NetBSD: linux_machdep.c,v 1.149 2010/07/07 01:30:34 chs Exp $	*/
2 
3 /*-
4  * Copyright (c) 1995, 2000, 2008, 2009 The NetBSD Foundation, Inc.
5  * All rights reserved.
6  *
7  * This code is derived from software contributed to The NetBSD Foundation
8  * by Frank van der Linden, and by Andrew Doran.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29  * POSSIBILITY OF SUCH DAMAGE.
30  */
31 
32 #include <sys/cdefs.h>
33 __KERNEL_RCSID(0, "$NetBSD: linux_machdep.c,v 1.149 2010/07/07 01:30:34 chs Exp $");
34 
35 #if defined(_KERNEL_OPT)
36 #include "opt_vm86.h"
37 #include "opt_user_ldt.h"
38 #endif
39 
40 #include <sys/param.h>
41 #include <sys/systm.h>
42 #include <sys/signalvar.h>
43 #include <sys/kernel.h>
44 #include <sys/proc.h>
45 #include <sys/buf.h>
46 #include <sys/reboot.h>
47 #include <sys/conf.h>
48 #include <sys/exec.h>
49 #include <sys/file.h>
50 #include <sys/callout.h>
51 #include <sys/malloc.h>
52 #include <sys/mbuf.h>
53 #include <sys/msgbuf.h>
54 #include <sys/mount.h>
55 #include <sys/vnode.h>
56 #include <sys/device.h>
57 #include <sys/syscallargs.h>
58 #include <sys/filedesc.h>
59 #include <sys/exec_elf.h>
60 #include <sys/disklabel.h>
61 #include <sys/ioctl.h>
62 #include <sys/wait.h>
63 #include <sys/kauth.h>
64 #include <sys/kmem.h>
65 
66 #include <miscfs/specfs/specdev.h>
67 
68 #include <compat/linux/common/linux_types.h>
69 #include <compat/linux/common/linux_signal.h>
70 #include <compat/linux/common/linux_util.h>
71 #include <compat/linux/common/linux_ioctl.h>
72 #include <compat/linux/common/linux_hdio.h>
73 #include <compat/linux/common/linux_exec.h>
74 #include <compat/linux/common/linux_machdep.h>
75 #include <compat/linux/common/linux_errno.h>
76 
77 #include <compat/linux/linux_syscallargs.h>
78 
79 #include <sys/cpu.h>
80 #include <machine/cpufunc.h>
81 #include <machine/psl.h>
82 #include <machine/reg.h>
83 #include <machine/segments.h>
84 #include <machine/specialreg.h>
85 #include <machine/sysarch.h>
86 #include <machine/vm86.h>
87 #include <machine/vmparam.h>
88 
89 /*
90  * To see whether wscons is configured (for virtual console ioctl calls).
91  */
92 #if defined(_KERNEL_OPT)
93 #include "wsdisplay.h"
94 #endif
95 #if (NWSDISPLAY > 0)
96 #include <dev/wscons/wsconsio.h>
97 #include <dev/wscons/wsdisplay_usl_io.h>
98 #if defined(_KERNEL_OPT)
99 #include "opt_xserver.h"
100 #endif
101 #endif
102 
103 #ifdef DEBUG_LINUX
104 #define DPRINTF(a) uprintf a
105 #else
106 #define DPRINTF(a)
107 #endif
108 
109 static struct biosdisk_info *fd2biosinfo(struct proc *, struct file *);
110 extern struct disklist *x86_alldisks;
111 static void linux_save_ucontext(struct lwp *, struct trapframe *,
112     const sigset_t *, struct sigaltstack *, struct linux_ucontext *);
113 static void linux_save_sigcontext(struct lwp *, struct trapframe *,
114     const sigset_t *, struct linux_sigcontext *);
115 static int linux_restore_sigcontext(struct lwp *,
116     struct linux_sigcontext *, register_t *);
117 static void linux_rt_sendsig(const ksiginfo_t *, const sigset_t *);
118 static void linux_old_sendsig(const ksiginfo_t *, const sigset_t *);
119 
120 extern char linux_sigcode[], linux_rt_sigcode[];
121 
122 /*
123  * Deal with some i386-specific things in the Linux emulation code.
124  */
125 
126 void
127 linux_setregs(struct lwp *l, struct exec_package *epp, vaddr_t stack)
128 {
129 	struct pcb *pcb = lwp_getpcb(l);
130 	struct trapframe *tf;
131 
132 #if NNPX > 0
133 	/* If we were using the FPU, forget about it. */
134 	if (npxproc == l)
135 		npxdrop();
136 #endif
137 
138 #ifdef USER_LDT
139 	pmap_ldt_cleanup(l);
140 #endif
141 
142 	l->l_md.md_flags &= ~MDL_USEDFPU;
143 
144 	if (i386_use_fxsave) {
145 		pcb->pcb_savefpu.sv_xmm.sv_env.en_cw = __Linux_NPXCW__;
146 		pcb->pcb_savefpu.sv_xmm.sv_env.en_mxcsr = __INITIAL_MXCSR__;
147 	} else
148 		pcb->pcb_savefpu.sv_87.sv_env.en_cw = __Linux_NPXCW__;
149 
150 	tf = l->l_md.md_regs;
151 	tf->tf_gs = 0;
152 	tf->tf_fs = GSEL(GUDATA_SEL, SEL_UPL);
153 	tf->tf_es = GSEL(GUDATA_SEL, SEL_UPL);
154 	tf->tf_ds = GSEL(GUDATA_SEL, SEL_UPL);
155 	tf->tf_edi = 0;
156 	tf->tf_esi = 0;
157 	tf->tf_ebp = 0;
158 	tf->tf_ebx = (int)l->l_proc->p_psstr;
159 	tf->tf_edx = 0;
160 	tf->tf_ecx = 0;
161 	tf->tf_eax = 0;
162 	tf->tf_eip = epp->ep_entry;
163 	tf->tf_cs = GSEL(GUCODEBIG_SEL, SEL_UPL);
164 	tf->tf_eflags = PSL_USERSET;
165 	tf->tf_esp = stack;
166 	tf->tf_ss = GSEL(GUDATA_SEL, SEL_UPL);
167 }
168 
169 /*
170  * Send an interrupt to process.
171  *
172  * Stack is set up to allow sigcode stored
173  * in u. to call routine, followed by kcall
174  * to sigreturn routine below.  After sigreturn
175  * resets the signal mask, the stack, and the
176  * frame pointer, it returns to the user
177  * specified pc, psl.
178  */
179 
180 void
181 linux_sendsig(const ksiginfo_t *ksi, const sigset_t *mask)
182 {
183 	if (SIGACTION(curproc, ksi->ksi_signo).sa_flags & SA_SIGINFO)
184 		linux_rt_sendsig(ksi, mask);
185 	else
186 		linux_old_sendsig(ksi, mask);
187 }
188 
189 
190 static void
191 linux_save_ucontext(struct lwp *l, struct trapframe *tf, const sigset_t *mask, struct sigaltstack *sas, struct linux_ucontext *uc)
192 {
193 	uc->uc_flags = 0;
194 	uc->uc_link = NULL;
195 	native_to_linux_sigaltstack(&uc->uc_stack, sas);
196 	linux_save_sigcontext(l, tf, mask, &uc->uc_mcontext);
197 	native_to_linux_sigset(&uc->uc_sigmask, mask);
198 	(void)memset(&uc->uc_fpregs_mem, 0, sizeof(uc->uc_fpregs_mem));
199 }
200 
201 static void
202 linux_save_sigcontext(struct lwp *l, struct trapframe *tf,
203     const sigset_t *mask, struct linux_sigcontext *sc)
204 {
205 	struct pcb *pcb = lwp_getpcb(l);
206 
207 	/* Save register context. */
208 #ifdef VM86
209 	if (tf->tf_eflags & PSL_VM) {
210 		sc->sc_gs = tf->tf_vm86_gs;
211 		sc->sc_fs = tf->tf_vm86_fs;
212 		sc->sc_es = tf->tf_vm86_es;
213 		sc->sc_ds = tf->tf_vm86_ds;
214 		sc->sc_eflags = get_vflags(l);
215 	} else
216 #endif
217 	{
218 		sc->sc_gs = tf->tf_gs;
219 		sc->sc_fs = tf->tf_fs;
220 		sc->sc_es = tf->tf_es;
221 		sc->sc_ds = tf->tf_ds;
222 		sc->sc_eflags = tf->tf_eflags;
223 	}
224 	sc->sc_edi = tf->tf_edi;
225 	sc->sc_esi = tf->tf_esi;
226 	sc->sc_esp = tf->tf_esp;
227 	sc->sc_ebp = tf->tf_ebp;
228 	sc->sc_ebx = tf->tf_ebx;
229 	sc->sc_edx = tf->tf_edx;
230 	sc->sc_ecx = tf->tf_ecx;
231 	sc->sc_eax = tf->tf_eax;
232 	sc->sc_eip = tf->tf_eip;
233 	sc->sc_cs = tf->tf_cs;
234 	sc->sc_esp_at_signal = tf->tf_esp;
235 	sc->sc_ss = tf->tf_ss;
236 	sc->sc_err = tf->tf_err;
237 	sc->sc_trapno = tf->tf_trapno;
238 	sc->sc_cr2 = pcb->pcb_cr2;
239 	sc->sc_387 = NULL;
240 
241 	/* Save signal stack. */
242 	/* Linux doesn't save the onstack flag in sigframe */
243 
244 	/* Save signal mask. */
245 	native_to_linux_old_sigset(&sc->sc_mask, mask);
246 }
247 
248 static void
249 linux_rt_sendsig(const ksiginfo_t *ksi, const sigset_t *mask)
250 {
251 	struct lwp *l = curlwp;
252 	struct proc *p = l->l_proc;
253 	struct trapframe *tf;
254 	struct linux_rt_sigframe *fp, frame;
255 	int onstack, error;
256 	linux_siginfo_t *lsi;
257 	int sig = ksi->ksi_signo;
258 	sig_t catcher = SIGACTION(p, sig).sa_handler;
259 	struct sigaltstack *sas = &l->l_sigstk;
260 
261 	tf = l->l_md.md_regs;
262 	/* Do we need to jump onto the signal stack? */
263 	onstack = (sas->ss_flags & (SS_DISABLE | SS_ONSTACK)) == 0 &&
264 	    (SIGACTION(p, sig).sa_flags & SA_ONSTACK) != 0;
265 
266 
267 	/* Allocate space for the signal handler context. */
268 	if (onstack)
269 		fp = (struct linux_rt_sigframe *)((char *)sas->ss_sp +
270 		    sas->ss_size);
271 	else
272 		fp = (struct linux_rt_sigframe *)tf->tf_esp;
273 	fp--;
274 
275 	DPRINTF(("rt: onstack = %d, fp = %p sig = %d eip = 0x%x cr2 = 0x%x\n",
276 	    onstack, fp, sig, tf->tf_eip,
277 	    ((struct pcb *)lwp_getpcb(l))->pcb_cr2));
278 
279 	/* Build stack frame for signal trampoline. */
280 	frame.sf_handler = catcher;
281 	frame.sf_sig = native_to_linux_signo[sig];
282 	frame.sf_sip = &fp->sf_si;
283 	frame.sf_ucp = &fp->sf_uc;
284 
285 	/*
286 	 * XXX: the following code assumes that the constants for
287 	 * siginfo are the same between linux and NetBSD.
288 	 */
289 	(void)memset(lsi = &frame.sf_si, 0, sizeof(frame.sf_si));
290 	lsi->lsi_errno = native_to_linux_errno[ksi->ksi_errno];
291 	lsi->lsi_code = native_to_linux_si_code(ksi->ksi_code);
292 	switch (lsi->lsi_signo = frame.sf_sig) {
293 	case LINUX_SIGILL:
294 	case LINUX_SIGFPE:
295 	case LINUX_SIGSEGV:
296 	case LINUX_SIGBUS:
297 	case LINUX_SIGTRAP:
298 		lsi->lsi_addr = ksi->ksi_addr;
299 		break;
300 	case LINUX_SIGCHLD:
301 		lsi->lsi_uid = ksi->ksi_uid;
302 		lsi->lsi_pid = ksi->ksi_pid;
303 		lsi->lsi_utime = ksi->ksi_utime;
304 		lsi->lsi_stime = ksi->ksi_stime;
305 		lsi->lsi_status =
306 		    native_to_linux_si_status(ksi->ksi_code, ksi->ksi_status);
307 		break;
308 	case LINUX_SIGIO:
309 		lsi->lsi_band = ksi->ksi_band;
310 		lsi->lsi_fd = ksi->ksi_fd;
311 		break;
312 	default:
313 		lsi->lsi_uid = ksi->ksi_uid;
314 		lsi->lsi_pid = ksi->ksi_pid;
315 		if (lsi->lsi_signo == LINUX_SIGALRM ||
316 		    lsi->lsi_signo >= LINUX_SIGRTMIN)
317 			lsi->lsi_value.sival_ptr = ksi->ksi_value.sival_ptr;
318 		break;
319 	}
320 
321 	/* Save register context. */
322 	linux_save_ucontext(l, tf, mask, sas, &frame.sf_uc);
323 	sendsig_reset(l, sig);
324 
325 	mutex_exit(p->p_lock);
326 	error = copyout(&frame, fp, sizeof(frame));
327 	mutex_enter(p->p_lock);
328 
329 	if (error != 0) {
330 		/*
331 		 * Process has trashed its stack; give it an illegal
332 		 * instruction to halt it in its tracks.
333 		 */
334 		sigexit(l, SIGILL);
335 		/* NOTREACHED */
336 	}
337 
338 	/*
339 	 * Build context to run handler in.
340 	 */
341 	tf->tf_fs = GSEL(GUDATA_SEL, SEL_UPL);
342 	tf->tf_es = GSEL(GUDATA_SEL, SEL_UPL);
343 	tf->tf_ds = GSEL(GUDATA_SEL, SEL_UPL);
344 	tf->tf_eip = ((int)p->p_sigctx.ps_sigcode) +
345 	    (linux_rt_sigcode - linux_sigcode);
346 	tf->tf_cs = GSEL(GUCODE_SEL, SEL_UPL);
347 	tf->tf_eflags &= ~PSL_CLEARSIG;
348 	tf->tf_esp = (int)fp;
349 	tf->tf_ss = GSEL(GUDATA_SEL, SEL_UPL);
350 
351 	/* Remember that we're now on the signal stack. */
352 	if (onstack)
353 		sas->ss_flags |= SS_ONSTACK;
354 }
355 
356 static void
357 linux_old_sendsig(const ksiginfo_t *ksi, const sigset_t *mask)
358 {
359 	struct lwp *l = curlwp;
360 	struct proc *p = l->l_proc;
361 	struct trapframe *tf;
362 	struct linux_sigframe *fp, frame;
363 	int onstack, error;
364 	int sig = ksi->ksi_signo;
365 	sig_t catcher = SIGACTION(p, sig).sa_handler;
366 	struct sigaltstack *sas = &l->l_sigstk;
367 
368 	tf = l->l_md.md_regs;
369 
370 	/* Do we need to jump onto the signal stack? */
371 	onstack = (sas->ss_flags & (SS_DISABLE | SS_ONSTACK)) == 0 &&
372 	    (SIGACTION(p, sig).sa_flags & SA_ONSTACK) != 0;
373 
374 	/* Allocate space for the signal handler context. */
375 	if (onstack)
376 		fp = (struct linux_sigframe *) ((char *)sas->ss_sp +
377 		    sas->ss_size);
378 	else
379 		fp = (struct linux_sigframe *)tf->tf_esp;
380 	fp--;
381 
382 	DPRINTF(("old: onstack = %d, fp = %p sig = %d eip = 0x%x cr2 = 0x%x\n",
383 	    onstack, fp, sig, tf->tf_eip,
384 	    ((struct pcb *)lwp_getpcb(l))->pcb_cr2));
385 
386 	/* Build stack frame for signal trampoline. */
387 	frame.sf_handler = catcher;
388 	frame.sf_sig = native_to_linux_signo[sig];
389 
390 	linux_save_sigcontext(l, tf, mask, &frame.sf_sc);
391 	sendsig_reset(l, sig);
392 
393 	mutex_exit(p->p_lock);
394 	error = copyout(&frame, fp, sizeof(frame));
395 	mutex_enter(p->p_lock);
396 
397 	if (error != 0) {
398 		/*
399 		 * Process has trashed its stack; give it an illegal
400 		 * instruction to halt it in its tracks.
401 		 */
402 		sigexit(l, SIGILL);
403 		/* NOTREACHED */
404 	}
405 
406 	/*
407 	 * Build context to run handler in.
408 	 */
409 	tf->tf_fs = GSEL(GUDATA_SEL, SEL_UPL);
410 	tf->tf_es = GSEL(GUDATA_SEL, SEL_UPL);
411 	tf->tf_ds = GSEL(GUDATA_SEL, SEL_UPL);
412 	tf->tf_eip = (int)p->p_sigctx.ps_sigcode;
413 	tf->tf_cs = GSEL(GUCODEBIG_SEL, SEL_UPL);
414 	tf->tf_eflags &= ~PSL_CLEARSIG;
415 	tf->tf_esp = (int)fp;
416 	tf->tf_ss = GSEL(GUDATA_SEL, SEL_UPL);
417 
418 	/* Remember that we're now on the signal stack. */
419 	if (onstack)
420 		sas->ss_flags |= SS_ONSTACK;
421 }
422 
423 /*
424  * System call to cleanup state after a signal
425  * has been taken.  Reset signal mask and
426  * stack state from context left by sendsig (above).
427  * Return to previous pc and psl as specified by
428  * context left by sendsig. Check carefully to
429  * make sure that the user has not modified the
430  * psl to gain improper privileges or to cause
431  * a machine fault.
432  */
433 int
434 linux_sys_rt_sigreturn(struct lwp *l, const struct linux_sys_rt_sigreturn_args *uap, register_t *retval)
435 {
436 	/* {
437 		syscallarg(struct linux_ucontext *) ucp;
438 	} */
439 	struct linux_ucontext context, *ucp = SCARG(uap, ucp);
440 	int error;
441 
442 	/*
443 	 * The trampoline code hands us the context.
444 	 * It is unsafe to keep track of it ourselves, in the event that a
445 	 * program jumps out of a signal handler.
446 	 */
447 	if ((error = copyin(ucp, &context, sizeof(*ucp))) != 0)
448 		return error;
449 
450 	/* XXX XAX we can do better here by using more of the ucontext */
451 	return linux_restore_sigcontext(l, &context.uc_mcontext, retval);
452 }
453 
454 int
455 linux_sys_sigreturn(struct lwp *l, const struct linux_sys_sigreturn_args *uap, register_t *retval)
456 {
457 	/* {
458 		syscallarg(struct linux_sigcontext *) scp;
459 	} */
460 	struct linux_sigcontext context, *scp = SCARG(uap, scp);
461 	int error;
462 
463 	/*
464 	 * The trampoline code hands us the context.
465 	 * It is unsafe to keep track of it ourselves, in the event that a
466 	 * program jumps out of a signal handler.
467 	 */
468 	if ((error = copyin((void *)scp, &context, sizeof(*scp))) != 0)
469 		return error;
470 	return linux_restore_sigcontext(l, &context, retval);
471 }
472 
473 static int
474 linux_restore_sigcontext(struct lwp *l, struct linux_sigcontext *scp,
475     register_t *retval)
476 {
477 	struct proc *p = l->l_proc;
478 	struct sigaltstack *sas = &l->l_sigstk;
479 	struct trapframe *tf;
480 	sigset_t mask;
481 	ssize_t ss_gap;
482 
483 	/* Restore register context. */
484 	tf = l->l_md.md_regs;
485 	DPRINTF(("sigreturn enter esp=0x%x eip=0x%x\n", tf->tf_esp, tf->tf_eip));
486 
487 #ifdef VM86
488 	if (scp->sc_eflags & PSL_VM) {
489 		void syscall_vm86(struct trapframe *);
490 
491 		tf->tf_vm86_gs = scp->sc_gs;
492 		tf->tf_vm86_fs = scp->sc_fs;
493 		tf->tf_vm86_es = scp->sc_es;
494 		tf->tf_vm86_ds = scp->sc_ds;
495 		set_vflags(l, scp->sc_eflags);
496 		p->p_md.md_syscall = syscall_vm86;
497 	} else
498 #endif
499 	{
500 		/*
501 		 * Check for security violations.  If we're returning to
502 		 * protected mode, the CPU will validate the segment registers
503 		 * automatically and generate a trap on violations.  We handle
504 		 * the trap, rather than doing all of the checking here.
505 		 */
506 		if (((scp->sc_eflags ^ tf->tf_eflags) & PSL_USERSTATIC) != 0 ||
507 		    !USERMODE(scp->sc_cs, scp->sc_eflags))
508 			return EINVAL;
509 
510 		tf->tf_gs = scp->sc_gs;
511 		tf->tf_fs = scp->sc_fs;
512 		tf->tf_es = scp->sc_es;
513 		tf->tf_ds = scp->sc_ds;
514 #ifdef VM86
515 		if (tf->tf_eflags & PSL_VM)
516 			(*p->p_emul->e_syscall_intern)(p);
517 #endif
518 		tf->tf_eflags = scp->sc_eflags;
519 	}
520 	tf->tf_edi = scp->sc_edi;
521 	tf->tf_esi = scp->sc_esi;
522 	tf->tf_ebp = scp->sc_ebp;
523 	tf->tf_ebx = scp->sc_ebx;
524 	tf->tf_edx = scp->sc_edx;
525 	tf->tf_ecx = scp->sc_ecx;
526 	tf->tf_eax = scp->sc_eax;
527 	tf->tf_eip = scp->sc_eip;
528 	tf->tf_cs = scp->sc_cs;
529 	tf->tf_esp = scp->sc_esp_at_signal;
530 	tf->tf_ss = scp->sc_ss;
531 
532 	/* Restore signal stack. */
533 	/*
534 	 * Linux really does it this way; it doesn't have space in sigframe
535 	 * to save the onstack flag.
536 	 */
537 	mutex_enter(p->p_lock);
538 	ss_gap = (ssize_t)((char *)scp->sc_esp_at_signal - (char *)sas->ss_sp);
539 	if (ss_gap >= 0 && ss_gap < sas->ss_size)
540 		sas->ss_flags |= SS_ONSTACK;
541 	else
542 		sas->ss_flags &= ~SS_ONSTACK;
543 
544 	/* Restore signal mask. */
545 	linux_old_to_native_sigset(&mask, &scp->sc_mask);
546 	(void) sigprocmask1(l, SIG_SETMASK, &mask, 0);
547 	mutex_exit(p->p_lock);
548 
549 	DPRINTF(("sigreturn exit esp=0x%x eip=0x%x\n", tf->tf_esp, tf->tf_eip));
550 	return EJUSTRETURN;
551 }
552 
553 #ifdef USER_LDT
554 
555 static int
556 linux_read_ldt(struct lwp *l, const struct linux_sys_modify_ldt_args *uap,
557     register_t *retval)
558 {
559 	struct x86_get_ldt_args gl;
560 	int error;
561 	union descriptor *ldt_buf;
562 	size_t sz;
563 
564 	/*
565 	 * I've checked the linux code - this function is asymetric with
566 	 * linux_write_ldt, and returns raw ldt entries.
567 	 * NB, the code I saw zerod the spare parts of the user buffer.
568 	 */
569 
570 	DPRINTF(("linux_read_ldt!"));
571 
572 	sz = 8192 * sizeof(*ldt_buf);
573 	ldt_buf = kmem_zalloc(sz, KM_SLEEP);
574 	gl.start = 0;
575 	gl.desc = NULL;
576 	gl.num = SCARG(uap, bytecount) / sizeof(union descriptor);
577 	error = x86_get_ldt1(l, &gl, ldt_buf);
578 	/* NB gl.num might have changed */
579 	if (error == 0) {
580 		*retval = gl.num * sizeof *ldt;
581 		error = copyout(ldt_buf, SCARG(uap, ptr),
582 		    gl.num * sizeof *ldt_buf);
583 	}
584 	kmem_free(ldt_buf, sz);
585 
586 	return error;
587 }
588 
589 struct linux_ldt_info {
590 	u_int entry_number;
591 	u_long base_addr;
592 	u_int limit;
593 	u_int seg_32bit:1;
594 	u_int contents:2;
595 	u_int read_exec_only:1;
596 	u_int limit_in_pages:1;
597 	u_int seg_not_present:1;
598 	u_int useable:1;
599 };
600 
601 static int
602 linux_write_ldt(struct lwp *l, const struct linux_sys_modify_ldt_args *uap,
603     int oldmode)
604 {
605 	struct linux_ldt_info ldt_info;
606 	union descriptor d;
607 	struct x86_set_ldt_args sl;
608 	int error;
609 
610 	DPRINTF(("linux_write_ldt %d\n", oldmode));
611 	if (SCARG(uap, bytecount) != sizeof(ldt_info))
612 		return (EINVAL);
613 	if ((error = copyin(SCARG(uap, ptr), &ldt_info, sizeof(ldt_info))) != 0)
614 		return error;
615 	if (ldt_info.entry_number >= 8192)
616 		return (EINVAL);
617 	if (ldt_info.contents == 3) {
618 		if (oldmode)
619 			return (EINVAL);
620 		if (ldt_info.seg_not_present)
621 			return (EINVAL);
622 	}
623 
624 	if (ldt_info.base_addr == 0 && ldt_info.limit == 0 &&
625 	    (oldmode || (ldt_info.contents == 0 &&
626 	    ldt_info.read_exec_only == 1 && ldt_info.seg_32bit == 0 &&
627 	    ldt_info.limit_in_pages == 0 && ldt_info.seg_not_present == 1 &&
628 	    ldt_info.useable == 0))) {
629 		/* this means you should zero the ldt */
630 		(void)memset(&d, 0, sizeof(d));
631 	} else {
632 		d.sd.sd_lobase = ldt_info.base_addr & 0xffffff;
633 		d.sd.sd_hibase = (ldt_info.base_addr >> 24) & 0xff;
634 		d.sd.sd_lolimit = ldt_info.limit & 0xffff;
635 		d.sd.sd_hilimit = (ldt_info.limit >> 16) & 0xf;
636 		d.sd.sd_type = 16 | (ldt_info.contents << 2) |
637 		    (!ldt_info.read_exec_only << 1);
638 		d.sd.sd_dpl = SEL_UPL;
639 		d.sd.sd_p = !ldt_info.seg_not_present;
640 		d.sd.sd_def32 = ldt_info.seg_32bit;
641 		d.sd.sd_gran = ldt_info.limit_in_pages;
642 		if (!oldmode)
643 			d.sd.sd_xx = ldt_info.useable;
644 		else
645 			d.sd.sd_xx = 0;
646 	}
647 	sl.start = ldt_info.entry_number;
648 	sl.desc = NULL;
649 	sl.num = 1;
650 
651 	DPRINTF(("linux_write_ldt: idx=%d, base=0x%lx, limit=0x%x\n",
652 	    ldt_info.entry_number, ldt_info.base_addr, ldt_info.limit));
653 
654 	return x86_set_ldt1(l, &sl, &d);
655 }
656 
657 #endif /* USER_LDT */
658 
659 int
660 linux_sys_modify_ldt(struct lwp *l, const struct linux_sys_modify_ldt_args *uap, register_t *retval)
661 {
662 	/* {
663 		syscallarg(int) func;
664 		syscallarg(void *) ptr;
665 		syscallarg(size_t) bytecount;
666 	} */
667 
668 	switch (SCARG(uap, func)) {
669 #ifdef USER_LDT
670 	case 0:
671 		return linux_read_ldt(l, (const void *)uap, retval);
672 	case 1:
673 		return linux_write_ldt(l, (const void *)uap, 1);
674 	case 2:
675 #ifdef notyet
676 		return linux_read_default_ldt(l, (const void *)uap, retval);
677 #else
678 		return (ENOSYS);
679 #endif
680 	case 0x11:
681 		return linux_write_ldt(l, (const void *)uap, 0);
682 #endif /* USER_LDT */
683 
684 	default:
685 		return (ENOSYS);
686 	}
687 }
688 
689 /*
690  * XXX Pathetic hack to make svgalib work. This will fake the major
691  * device number of an opened VT so that svgalib likes it. grmbl.
692  * Should probably do it 'wrong the right way' and use a mapping
693  * array for all major device numbers, and map linux_mknod too.
694  */
695 dev_t
696 linux_fakedev(dev_t dev, int raw)
697 {
698 	extern const struct cdevsw ptc_cdevsw, pts_cdevsw;
699 	const struct cdevsw *cd = cdevsw_lookup(dev);
700 
701 	if (raw) {
702 #if (NWSDISPLAY > 0)
703 		extern const struct cdevsw wsdisplay_cdevsw;
704 		if (cd == &wsdisplay_cdevsw)
705 			return makedev(LINUX_CONS_MAJOR, (minor(dev) + 1));
706 #endif
707 	}
708 
709 	if (cd == &ptc_cdevsw)
710 		return makedev(LINUX_PTC_MAJOR, minor(dev));
711 	if (cd == &pts_cdevsw)
712 		return makedev(LINUX_PTS_MAJOR, minor(dev));
713 
714 	return dev;
715 }
716 
717 #if (NWSDISPLAY > 0)
718 /*
719  * That's not complete, but enough to get an X server running.
720  */
721 #define NR_KEYS 128
722 static const u_short plain_map[NR_KEYS] = {
723 	0x0200,	0x001b,	0x0031,	0x0032,	0x0033,	0x0034,	0x0035,	0x0036,
724 	0x0037,	0x0038,	0x0039,	0x0030,	0x002d,	0x003d,	0x007f,	0x0009,
725 	0x0b71,	0x0b77,	0x0b65,	0x0b72,	0x0b74,	0x0b79,	0x0b75,	0x0b69,
726 	0x0b6f,	0x0b70,	0x005b,	0x005d,	0x0201,	0x0702,	0x0b61,	0x0b73,
727 	0x0b64,	0x0b66,	0x0b67,	0x0b68,	0x0b6a,	0x0b6b,	0x0b6c,	0x003b,
728 	0x0027,	0x0060,	0x0700,	0x005c,	0x0b7a,	0x0b78,	0x0b63,	0x0b76,
729 	0x0b62,	0x0b6e,	0x0b6d,	0x002c,	0x002e,	0x002f,	0x0700,	0x030c,
730 	0x0703,	0x0020,	0x0207,	0x0100,	0x0101,	0x0102,	0x0103,	0x0104,
731 	0x0105,	0x0106,	0x0107,	0x0108,	0x0109,	0x0208,	0x0209,	0x0307,
732 	0x0308,	0x0309,	0x030b,	0x0304,	0x0305,	0x0306,	0x030a,	0x0301,
733 	0x0302,	0x0303,	0x0300,	0x0310,	0x0206,	0x0200,	0x003c,	0x010a,
734 	0x010b,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
735 	0x030e,	0x0702,	0x030d,	0x001c,	0x0701,	0x0205,	0x0114,	0x0603,
736 	0x0118,	0x0601,	0x0602,	0x0117,	0x0600,	0x0119,	0x0115,	0x0116,
737 	0x011a,	0x010c,	0x010d,	0x011b,	0x011c,	0x0110,	0x0311,	0x011d,
738 	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
739 }, shift_map[NR_KEYS] = {
740 	0x0200,	0x001b,	0x0021,	0x0040,	0x0023,	0x0024,	0x0025,	0x005e,
741 	0x0026,	0x002a,	0x0028,	0x0029,	0x005f,	0x002b,	0x007f,	0x0009,
742 	0x0b51,	0x0b57,	0x0b45,	0x0b52,	0x0b54,	0x0b59,	0x0b55,	0x0b49,
743 	0x0b4f,	0x0b50,	0x007b,	0x007d,	0x0201,	0x0702,	0x0b41,	0x0b53,
744 	0x0b44,	0x0b46,	0x0b47,	0x0b48,	0x0b4a,	0x0b4b,	0x0b4c,	0x003a,
745 	0x0022,	0x007e,	0x0700,	0x007c,	0x0b5a,	0x0b58,	0x0b43,	0x0b56,
746 	0x0b42,	0x0b4e,	0x0b4d,	0x003c,	0x003e,	0x003f,	0x0700,	0x030c,
747 	0x0703,	0x0020,	0x0207,	0x010a,	0x010b,	0x010c,	0x010d,	0x010e,
748 	0x010f,	0x0110,	0x0111,	0x0112,	0x0113,	0x0213,	0x0203,	0x0307,
749 	0x0308,	0x0309,	0x030b,	0x0304,	0x0305,	0x0306,	0x030a,	0x0301,
750 	0x0302,	0x0303,	0x0300,	0x0310,	0x0206,	0x0200,	0x003e,	0x010a,
751 	0x010b,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
752 	0x030e,	0x0702,	0x030d,	0x0200,	0x0701,	0x0205,	0x0114,	0x0603,
753 	0x020b,	0x0601,	0x0602,	0x0117,	0x0600,	0x020a,	0x0115,	0x0116,
754 	0x011a,	0x010c,	0x010d,	0x011b,	0x011c,	0x0110,	0x0311,	0x011d,
755 	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
756 }, altgr_map[NR_KEYS] = {
757 	0x0200,	0x0200,	0x0200,	0x0040,	0x0200,	0x0024,	0x0200,	0x0200,
758 	0x007b,	0x005b,	0x005d,	0x007d,	0x005c,	0x0200,	0x0200,	0x0200,
759 	0x0b71,	0x0b77,	0x0918,	0x0b72,	0x0b74,	0x0b79,	0x0b75,	0x0b69,
760 	0x0b6f,	0x0b70,	0x0200,	0x007e,	0x0201,	0x0702,	0x0914,	0x0b73,
761 	0x0917,	0x0919,	0x0b67,	0x0b68,	0x0b6a,	0x0b6b,	0x0b6c,	0x0200,
762 	0x0200,	0x0200,	0x0700,	0x0200,	0x0b7a,	0x0b78,	0x0916,	0x0b76,
763 	0x0915,	0x0b6e,	0x0b6d,	0x0200,	0x0200,	0x0200,	0x0700,	0x030c,
764 	0x0703,	0x0200,	0x0207,	0x050c,	0x050d,	0x050e,	0x050f,	0x0510,
765 	0x0511,	0x0512,	0x0513,	0x0514,	0x0515,	0x0208,	0x0202,	0x0911,
766 	0x0912,	0x0913,	0x030b,	0x090e,	0x090f,	0x0910,	0x030a,	0x090b,
767 	0x090c,	0x090d,	0x090a,	0x0310,	0x0206,	0x0200,	0x007c,	0x0516,
768 	0x0517,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
769 	0x030e,	0x0702,	0x030d,	0x0200,	0x0701,	0x0205,	0x0114,	0x0603,
770 	0x0118,	0x0601,	0x0602,	0x0117,	0x0600,	0x0119,	0x0115,	0x0116,
771 	0x011a,	0x010c,	0x010d,	0x011b,	0x011c,	0x0110,	0x0311,	0x011d,
772 	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
773 }, ctrl_map[NR_KEYS] = {
774 	0x0200,	0x0200,	0x0200,	0x0000,	0x001b,	0x001c,	0x001d,	0x001e,
775 	0x001f,	0x007f,	0x0200,	0x0200,	0x001f,	0x0200,	0x0008,	0x0200,
776 	0x0011,	0x0017,	0x0005,	0x0012,	0x0014,	0x0019,	0x0015,	0x0009,
777 	0x000f,	0x0010,	0x001b,	0x001d,	0x0201,	0x0702,	0x0001,	0x0013,
778 	0x0004,	0x0006,	0x0007,	0x0008,	0x000a,	0x000b,	0x000c,	0x0200,
779 	0x0007,	0x0000,	0x0700,	0x001c,	0x001a,	0x0018,	0x0003,	0x0016,
780 	0x0002,	0x000e,	0x000d,	0x0200,	0x020e,	0x007f,	0x0700,	0x030c,
781 	0x0703,	0x0000,	0x0207,	0x0100,	0x0101,	0x0102,	0x0103,	0x0104,
782 	0x0105,	0x0106,	0x0107,	0x0108,	0x0109,	0x0208,	0x0204,	0x0307,
783 	0x0308,	0x0309,	0x030b,	0x0304,	0x0305,	0x0306,	0x030a,	0x0301,
784 	0x0302,	0x0303,	0x0300,	0x0310,	0x0206,	0x0200,	0x0200,	0x010a,
785 	0x010b,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
786 	0x030e,	0x0702,	0x030d,	0x001c,	0x0701,	0x0205,	0x0114,	0x0603,
787 	0x0118,	0x0601,	0x0602,	0x0117,	0x0600,	0x0119,	0x0115,	0x0116,
788 	0x011a,	0x010c,	0x010d,	0x011b,	0x011c,	0x0110,	0x0311,	0x011d,
789 	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
790 };
791 
792 const u_short * const linux_keytabs[] = {
793 	plain_map, shift_map, altgr_map, altgr_map, ctrl_map
794 };
795 #endif
796 
797 static struct biosdisk_info *
798 fd2biosinfo(struct proc *p, struct file *fp)
799 {
800 	struct vnode *vp;
801 	const char *blkname;
802 	char diskname[16];
803 	int i;
804 	struct nativedisk_info *nip;
805 	struct disklist *dl = x86_alldisks;
806 
807 	if (fp->f_type != DTYPE_VNODE)
808 		return NULL;
809 	vp = (struct vnode *)fp->f_data;
810 
811 	if (vp->v_type != VBLK)
812 		return NULL;
813 
814 	blkname = devsw_blk2name(major(vp->v_rdev));
815 	snprintf(diskname, sizeof diskname, "%s%llu", blkname,
816 	    (unsigned long long)DISKUNIT(vp->v_rdev));
817 
818 	for (i = 0; i < dl->dl_nnativedisks; i++) {
819 		nip = &dl->dl_nativedisks[i];
820 		if (strcmp(diskname, nip->ni_devname))
821 			continue;
822 		if (nip->ni_nmatches != 0)
823 			return &dl->dl_biosdisks[nip->ni_biosmatches[0]];
824 	}
825 
826 	return NULL;
827 }
828 
829 
830 /*
831  * We come here in a last attempt to satisfy a Linux ioctl() call
832  */
833 int
834 linux_machdepioctl(struct lwp *l, const struct linux_sys_ioctl_args *uap, register_t *retval)
835 {
836 	/* {
837 		syscallarg(int) fd;
838 		syscallarg(u_long) com;
839 		syscallarg(void *) data;
840 	} */
841 	struct sys_ioctl_args bia;
842 	u_long com;
843 	int error, error1;
844 #if (NWSDISPLAY > 0)
845 	struct vt_mode lvt;
846 	struct kbentry kbe;
847 #endif
848 	struct linux_hd_geometry hdg;
849 	struct linux_hd_big_geometry hdg_big;
850 	struct biosdisk_info *bip;
851 	file_t *fp;
852 	int fd;
853 	struct disklabel label, *labp;
854 	struct partinfo partp;
855 	int (*ioctlf)(struct file *, u_long, void *);
856 	u_long start, biostotal, realtotal;
857 	u_char heads, sectors;
858 	u_int cylinders;
859 	struct ioctl_pt pt;
860 
861 	fd = SCARG(uap, fd);
862 	SCARG(&bia, fd) = fd;
863 	SCARG(&bia, data) = SCARG(uap, data);
864 	com = SCARG(uap, com);
865 
866 	if ((fp = fd_getfile(fd)) == NULL)
867 		return (EBADF);
868 
869 	switch (com) {
870 #if (NWSDISPLAY > 0)
871 	case LINUX_KDGKBMODE:
872 		com = KDGKBMODE;
873 		break;
874 	case LINUX_KDSKBMODE:
875 		com = KDSKBMODE;
876 		if ((unsigned)SCARG(uap, data) == LINUX_K_MEDIUMRAW)
877 			SCARG(&bia, data) = (void *)K_RAW;
878 		break;
879 	case LINUX_KIOCSOUND:
880 		SCARG(&bia, data) =
881 		    (void *)(((unsigned long)SCARG(&bia, data)) & 0xffff);
882 		/* fall through */
883 	case LINUX_KDMKTONE:
884 		com = KDMKTONE;
885 		break;
886 	case LINUX_KDSETMODE:
887 		com = KDSETMODE;
888 		break;
889 	case LINUX_KDGETMODE:
890 		/* KD_* values are equal to the wscons numbers */
891 		com = WSDISPLAYIO_GMODE;
892 		break;
893 	case LINUX_KDENABIO:
894 		com = KDENABIO;
895 		break;
896 	case LINUX_KDDISABIO:
897 		com = KDDISABIO;
898 		break;
899 	case LINUX_KDGETLED:
900 		com = KDGETLED;
901 		break;
902 	case LINUX_KDSETLED:
903 		com = KDSETLED;
904 		break;
905 	case LINUX_VT_OPENQRY:
906 		com = VT_OPENQRY;
907 		break;
908 	case LINUX_VT_GETMODE:
909 		error = fp->f_ops->fo_ioctl(fp, VT_GETMODE, &lvt);
910 		if (error != 0)
911 			goto out;
912 		lvt.relsig = native_to_linux_signo[lvt.relsig];
913 		lvt.acqsig = native_to_linux_signo[lvt.acqsig];
914 		lvt.frsig = native_to_linux_signo[lvt.frsig];
915 		error = copyout(&lvt, SCARG(uap, data), sizeof (lvt));
916 		goto out;
917 	case LINUX_VT_SETMODE:
918 		error = copyin(SCARG(uap, data), &lvt, sizeof (lvt));
919 		if (error != 0)
920 			goto out;
921 		lvt.relsig = linux_to_native_signo[lvt.relsig];
922 		lvt.acqsig = linux_to_native_signo[lvt.acqsig];
923 		lvt.frsig = linux_to_native_signo[lvt.frsig];
924 		error = fp->f_ops->fo_ioctl(fp, VT_SETMODE, &lvt);
925 		goto out;
926 	case LINUX_VT_DISALLOCATE:
927 		/* XXX should use WSDISPLAYIO_DELSCREEN */
928 		error = 0;
929 		goto out;
930 	case LINUX_VT_RELDISP:
931 		com = VT_RELDISP;
932 		break;
933 	case LINUX_VT_ACTIVATE:
934 		com = VT_ACTIVATE;
935 		break;
936 	case LINUX_VT_WAITACTIVE:
937 		com = VT_WAITACTIVE;
938 		break;
939 	case LINUX_VT_GETSTATE:
940 		com = VT_GETSTATE;
941 		break;
942 	case LINUX_KDGKBTYPE:
943 	    {
944 		static const u_int8_t kb101 = KB_101;
945 
946 		/* This is what Linux does. */
947 		error = copyout(&kb101, SCARG(uap, data), 1);
948 		goto out;
949 	    }
950 	case LINUX_KDGKBENT:
951 		/*
952 		 * The Linux KDGKBENT ioctl is different from the
953 		 * SYSV original. So we handle it in machdep code.
954 		 * XXX We should use keyboard mapping information
955 		 * from wsdisplay, but this would be expensive.
956 		 */
957 		if ((error = copyin(SCARG(uap, data), &kbe,
958 				    sizeof(struct kbentry))))
959 			goto out;
960 		if (kbe.kb_table >= sizeof(linux_keytabs) / sizeof(u_short *)
961 		    || kbe.kb_index >= NR_KEYS) {
962 			error = EINVAL;
963 			goto out;
964 		}
965 		kbe.kb_value = linux_keytabs[kbe.kb_table][kbe.kb_index];
966 		error = copyout(&kbe, SCARG(uap, data),
967 				sizeof(struct kbentry));
968 		goto out;
969 #endif
970 	case LINUX_HDIO_GETGEO:
971 	case LINUX_HDIO_GETGEO_BIG:
972 		/*
973 		 * Try to mimic Linux behaviour: return the BIOS geometry
974 		 * if possible (extending its # of cylinders if it's beyond
975 		 * the 1023 limit), fall back to the MI geometry (i.e.
976 		 * the real geometry) if not found, by returning an
977 		 * error. See common/linux_hdio.c
978 		 */
979 		bip = fd2biosinfo(curproc, fp);
980 		ioctlf = fp->f_ops->fo_ioctl;
981 		error = ioctlf(fp, DIOCGDEFLABEL, (void *)&label);
982 		error1 = ioctlf(fp, DIOCGPART, (void *)&partp);
983 		if (error != 0 && error1 != 0) {
984 			error = error1;
985 			goto out;
986 		}
987 		labp = error != 0 ? &label : partp.disklab;
988 		start = error1 != 0 ? partp.part->p_offset : 0;
989 		if (bip != NULL && bip->bi_head != 0 && bip->bi_sec != 0
990 		    && bip->bi_cyl != 0) {
991 			heads = bip->bi_head;
992 			sectors = bip->bi_sec;
993 			cylinders = bip->bi_cyl;
994 			biostotal = heads * sectors * cylinders;
995 			realtotal = labp->d_ntracks * labp->d_nsectors *
996 			    labp->d_ncylinders;
997 			if (realtotal > biostotal)
998 				cylinders = realtotal / (heads * sectors);
999 		} else {
1000 			heads = labp->d_ntracks;
1001 			cylinders = labp->d_ncylinders;
1002 			sectors = labp->d_nsectors;
1003 		}
1004 		if (com == LINUX_HDIO_GETGEO) {
1005 			hdg.start = start;
1006 			hdg.heads = heads;
1007 			hdg.cylinders = cylinders;
1008 			hdg.sectors = sectors;
1009 			error = copyout(&hdg, SCARG(uap, data), sizeof hdg);
1010 			goto out;
1011 		} else {
1012 			hdg_big.start = start;
1013 			hdg_big.heads = heads;
1014 			hdg_big.cylinders = cylinders;
1015 			hdg_big.sectors = sectors;
1016 			error = copyout(&hdg_big, SCARG(uap, data),
1017 			    sizeof hdg_big);
1018 			goto out;
1019 		}
1020 
1021 	default:
1022 		/*
1023 		 * Unknown to us. If it's on a device, just pass it through
1024 		 * using PTIOCLINUX, the device itself might be able to
1025 		 * make some sense of it.
1026 		 * XXX hack: if the function returns EJUSTRETURN,
1027 		 * it has stuffed a sysctl return value in pt.data.
1028 		 */
1029 		ioctlf = fp->f_ops->fo_ioctl;
1030 		pt.com = SCARG(uap, com);
1031 		pt.data = SCARG(uap, data);
1032 		error = ioctlf(fp, PTIOCLINUX, &pt);
1033 		if (error == EJUSTRETURN) {
1034 			retval[0] = (register_t)pt.data;
1035 			error = 0;
1036 		}
1037 
1038 		if (error == ENOTTY) {
1039 			DPRINTF(("linux_machdepioctl: invalid ioctl %08lx\n",
1040 			    com));
1041 		}
1042 		goto out;
1043 	}
1044 	SCARG(&bia, com) = com;
1045 	error = sys_ioctl(curlwp, &bia, retval);
1046 out:
1047 	fd_putfile(fd);
1048 	return error;
1049 }
1050 
1051 /*
1052  * Set I/O permissions for a process. Just set the maximum level
1053  * right away (ignoring the argument), otherwise we would have
1054  * to rely on I/O permission maps, which are not implemented.
1055  */
1056 int
1057 linux_sys_iopl(struct lwp *l, const struct linux_sys_iopl_args *uap, register_t *retval)
1058 {
1059 	/* {
1060 		syscallarg(int) level;
1061 	} */
1062 	struct trapframe *fp = l->l_md.md_regs;
1063 
1064 	if (kauth_authorize_machdep(l->l_cred, KAUTH_MACHDEP_IOPL,
1065 	    NULL, NULL, NULL, NULL) != 0)
1066 		return EPERM;
1067 	fp->tf_eflags |= PSL_IOPL;
1068 	*retval = 0;
1069 	return 0;
1070 }
1071 
1072 /*
1073  * See above. If a root process tries to set access to an I/O port,
1074  * just let it have the whole range.
1075  */
1076 int
1077 linux_sys_ioperm(struct lwp *l, const struct linux_sys_ioperm_args *uap, register_t *retval)
1078 {
1079 	/* {
1080 		syscallarg(unsigned int) lo;
1081 		syscallarg(unsigned int) hi;
1082 		syscallarg(int) val;
1083 	} */
1084 	struct trapframe *fp = l->l_md.md_regs;
1085 
1086 	if (kauth_authorize_machdep(l->l_cred, SCARG(uap, val) ?
1087 	    KAUTH_MACHDEP_IOPERM_SET : KAUTH_MACHDEP_IOPERM_GET, NULL, NULL,
1088 	    NULL, NULL) != 0)
1089 		return EPERM;
1090 	if (SCARG(uap, val))
1091 		fp->tf_eflags |= PSL_IOPL;
1092 	*retval = 0;
1093 	return 0;
1094 }
1095 
1096 int
1097 linux_usertrap(struct lwp *l, vaddr_t trapaddr,
1098     void *arg)
1099 {
1100 	return 0;
1101 }
1102 
1103 const char *
1104 linux_get_uname_arch(void)
1105 {
1106 	static char uname_arch[5] = "i386";
1107 
1108 	if (uname_arch[1] == '3')
1109 		uname_arch[1] += cpu_class;
1110 	return uname_arch;
1111 }
1112