xref: /netbsd-src/sys/compat/linux/arch/i386/linux_machdep.c (revision c8a35b6227034951e874c2def577388e79ede4a5)
1 /*	$NetBSD: linux_machdep.c,v 1.142 2009/01/11 02:45:48 christos Exp $	*/
2 
3 /*-
4  * Copyright (c) 1995, 2000, 2008 The NetBSD Foundation, Inc.
5  * All rights reserved.
6  *
7  * This code is derived from software contributed to The NetBSD Foundation
8  * by Frank van der Linden.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29  * POSSIBILITY OF SUCH DAMAGE.
30  */
31 
32 #include <sys/cdefs.h>
33 __KERNEL_RCSID(0, "$NetBSD: linux_machdep.c,v 1.142 2009/01/11 02:45:48 christos Exp $");
34 
35 #if defined(_KERNEL_OPT)
36 #include "opt_vm86.h"
37 #include "opt_user_ldt.h"
38 #endif
39 
40 #include <sys/param.h>
41 #include <sys/systm.h>
42 #include <sys/signalvar.h>
43 #include <sys/kernel.h>
44 #include <sys/proc.h>
45 #include <sys/user.h>
46 #include <sys/buf.h>
47 #include <sys/reboot.h>
48 #include <sys/conf.h>
49 #include <sys/exec.h>
50 #include <sys/file.h>
51 #include <sys/callout.h>
52 #include <sys/malloc.h>
53 #include <sys/mbuf.h>
54 #include <sys/msgbuf.h>
55 #include <sys/mount.h>
56 #include <sys/vnode.h>
57 #include <sys/device.h>
58 #include <sys/syscallargs.h>
59 #include <sys/filedesc.h>
60 #include <sys/exec_elf.h>
61 #include <sys/disklabel.h>
62 #include <sys/ioctl.h>
63 #include <sys/wait.h>
64 #include <sys/kauth.h>
65 
66 #include <miscfs/specfs/specdev.h>
67 
68 #include <compat/linux/common/linux_types.h>
69 #include <compat/linux/common/linux_signal.h>
70 #include <compat/linux/common/linux_util.h>
71 #include <compat/linux/common/linux_ioctl.h>
72 #include <compat/linux/common/linux_hdio.h>
73 #include <compat/linux/common/linux_exec.h>
74 #include <compat/linux/common/linux_machdep.h>
75 #include <compat/linux/common/linux_errno.h>
76 
77 #include <compat/linux/linux_syscallargs.h>
78 
79 #include <sys/cpu.h>
80 #include <machine/cpufunc.h>
81 #include <machine/psl.h>
82 #include <machine/reg.h>
83 #include <machine/segments.h>
84 #include <machine/specialreg.h>
85 #include <machine/sysarch.h>
86 #include <machine/vm86.h>
87 #include <machine/vmparam.h>
88 
89 /*
90  * To see whether wscons is configured (for virtual console ioctl calls).
91  */
92 #if defined(_KERNEL_OPT)
93 #include "wsdisplay.h"
94 #endif
95 #if (NWSDISPLAY > 0)
96 #include <dev/wscons/wsconsio.h>
97 #include <dev/wscons/wsdisplay_usl_io.h>
98 #if defined(_KERNEL_OPT)
99 #include "opt_xserver.h"
100 #endif
101 #endif
102 
103 #ifdef DEBUG_LINUX
104 #define DPRINTF(a) uprintf a
105 #else
106 #define DPRINTF(a)
107 #endif
108 
109 static struct biosdisk_info *fd2biosinfo(struct proc *, struct file *);
110 extern struct disklist *x86_alldisks;
111 static void linux_save_ucontext(struct lwp *, struct trapframe *,
112     const sigset_t *, struct sigaltstack *, struct linux_ucontext *);
113 static void linux_save_sigcontext(struct lwp *, struct trapframe *,
114     const sigset_t *, struct linux_sigcontext *);
115 static int linux_restore_sigcontext(struct lwp *,
116     struct linux_sigcontext *, register_t *);
117 static void linux_rt_sendsig(const ksiginfo_t *, const sigset_t *);
118 static void linux_old_sendsig(const ksiginfo_t *, const sigset_t *);
119 
120 extern char linux_sigcode[], linux_rt_sigcode[];
121 /*
122  * Deal with some i386-specific things in the Linux emulation code.
123  */
124 
125 void
126 linux_setregs(struct lwp *l, struct exec_package *epp, u_long stack)
127 {
128 	struct pcb *pcb = &l->l_addr->u_pcb;
129 	struct trapframe *tf;
130 
131 #if NNPX > 0
132 	/* If we were using the FPU, forget about it. */
133 	if (npxproc == l)
134 		npxdrop();
135 #endif
136 
137 #ifdef USER_LDT
138 	pmap_ldt_cleanup(l);
139 #endif
140 
141 	l->l_md.md_flags &= ~MDL_USEDFPU;
142 
143 	if (i386_use_fxsave) {
144 		pcb->pcb_savefpu.sv_xmm.sv_env.en_cw = __Linux_NPXCW__;
145 		pcb->pcb_savefpu.sv_xmm.sv_env.en_mxcsr = __INITIAL_MXCSR__;
146 	} else
147 		pcb->pcb_savefpu.sv_87.sv_env.en_cw = __Linux_NPXCW__;
148 
149 	tf = l->l_md.md_regs;
150 	tf->tf_gs = GSEL(GUDATA_SEL, SEL_UPL);
151 	tf->tf_fs = GSEL(GUDATA_SEL, SEL_UPL);
152 	tf->tf_es = GSEL(GUDATA_SEL, SEL_UPL);
153 	tf->tf_ds = GSEL(GUDATA_SEL, SEL_UPL);
154 	tf->tf_edi = 0;
155 	tf->tf_esi = 0;
156 	tf->tf_ebp = 0;
157 	tf->tf_ebx = (int)l->l_proc->p_psstr;
158 	tf->tf_edx = 0;
159 	tf->tf_ecx = 0;
160 	tf->tf_eax = 0;
161 	tf->tf_eip = epp->ep_entry;
162 	tf->tf_cs = GSEL(GUCODEBIG_SEL, SEL_UPL);
163 	tf->tf_eflags = PSL_USERSET;
164 	tf->tf_esp = stack;
165 	tf->tf_ss = GSEL(GUDATA_SEL, SEL_UPL);
166 }
167 
168 /*
169  * Send an interrupt to process.
170  *
171  * Stack is set up to allow sigcode stored
172  * in u. to call routine, followed by kcall
173  * to sigreturn routine below.  After sigreturn
174  * resets the signal mask, the stack, and the
175  * frame pointer, it returns to the user
176  * specified pc, psl.
177  */
178 
179 void
180 linux_sendsig(const ksiginfo_t *ksi, const sigset_t *mask)
181 {
182 	if (SIGACTION(curproc, ksi->ksi_signo).sa_flags & SA_SIGINFO)
183 		linux_rt_sendsig(ksi, mask);
184 	else
185 		linux_old_sendsig(ksi, mask);
186 }
187 
188 
189 static void
190 linux_save_ucontext(struct lwp *l, struct trapframe *tf, const sigset_t *mask, struct sigaltstack *sas, struct linux_ucontext *uc)
191 {
192 	uc->uc_flags = 0;
193 	uc->uc_link = NULL;
194 	native_to_linux_sigaltstack(&uc->uc_stack, sas);
195 	linux_save_sigcontext(l, tf, mask, &uc->uc_mcontext);
196 	native_to_linux_sigset(&uc->uc_sigmask, mask);
197 	(void)memset(&uc->uc_fpregs_mem, 0, sizeof(uc->uc_fpregs_mem));
198 }
199 
200 static void
201 linux_save_sigcontext(struct lwp *l, struct trapframe *tf, const sigset_t *mask, struct linux_sigcontext *sc)
202 {
203 	/* Save register context. */
204 #ifdef VM86
205 	if (tf->tf_eflags & PSL_VM) {
206 		sc->sc_gs = tf->tf_vm86_gs;
207 		sc->sc_fs = tf->tf_vm86_fs;
208 		sc->sc_es = tf->tf_vm86_es;
209 		sc->sc_ds = tf->tf_vm86_ds;
210 		sc->sc_eflags = get_vflags(l);
211 	} else
212 #endif
213 	{
214 		sc->sc_gs = tf->tf_gs;
215 		sc->sc_fs = tf->tf_fs;
216 		sc->sc_es = tf->tf_es;
217 		sc->sc_ds = tf->tf_ds;
218 		sc->sc_eflags = tf->tf_eflags;
219 	}
220 	sc->sc_edi = tf->tf_edi;
221 	sc->sc_esi = tf->tf_esi;
222 	sc->sc_esp = tf->tf_esp;
223 	sc->sc_ebp = tf->tf_ebp;
224 	sc->sc_ebx = tf->tf_ebx;
225 	sc->sc_edx = tf->tf_edx;
226 	sc->sc_ecx = tf->tf_ecx;
227 	sc->sc_eax = tf->tf_eax;
228 	sc->sc_eip = tf->tf_eip;
229 	sc->sc_cs = tf->tf_cs;
230 	sc->sc_esp_at_signal = tf->tf_esp;
231 	sc->sc_ss = tf->tf_ss;
232 	sc->sc_err = tf->tf_err;
233 	sc->sc_trapno = tf->tf_trapno;
234 	sc->sc_cr2 = l->l_addr->u_pcb.pcb_cr2;
235 	sc->sc_387 = NULL;
236 
237 	/* Save signal stack. */
238 	/* Linux doesn't save the onstack flag in sigframe */
239 
240 	/* Save signal mask. */
241 	native_to_linux_old_sigset(&sc->sc_mask, mask);
242 }
243 
244 static void
245 linux_rt_sendsig(const ksiginfo_t *ksi, const sigset_t *mask)
246 {
247 	struct lwp *l = curlwp;
248 	struct proc *p = l->l_proc;
249 	struct trapframe *tf;
250 	struct linux_rt_sigframe *fp, frame;
251 	int onstack, error;
252 	linux_siginfo_t *lsi;
253 	int sig = ksi->ksi_signo;
254 	sig_t catcher = SIGACTION(p, sig).sa_handler;
255 	struct sigaltstack *sas = &l->l_sigstk;
256 
257 	tf = l->l_md.md_regs;
258 	/* Do we need to jump onto the signal stack? */
259 	onstack = (sas->ss_flags & (SS_DISABLE | SS_ONSTACK)) == 0 &&
260 	    (SIGACTION(p, sig).sa_flags & SA_ONSTACK) != 0;
261 
262 
263 	/* Allocate space for the signal handler context. */
264 	if (onstack)
265 		fp = (struct linux_rt_sigframe *)((char *)sas->ss_sp +
266 		    sas->ss_size);
267 	else
268 		fp = (struct linux_rt_sigframe *)tf->tf_esp;
269 	fp--;
270 
271 	DPRINTF(("rt: onstack = %d, fp = %p sig = %d eip = 0x%x cr2 = 0x%x\n",
272 	    onstack, fp, sig, tf->tf_eip, l->l_addr->u_pcb.pcb_cr2));
273 
274 	/* Build stack frame for signal trampoline. */
275 	frame.sf_handler = catcher;
276 	frame.sf_sig = native_to_linux_signo[sig];
277 	frame.sf_sip = &fp->sf_si;
278 	frame.sf_ucp = &fp->sf_uc;
279 
280 	/*
281 	 * XXX: the following code assumes that the constants for
282 	 * siginfo are the same between linux and NetBSD.
283 	 */
284 	(void)memset(lsi = &frame.sf_si, 0, sizeof(frame.sf_si));
285 	lsi->lsi_errno = native_to_linux_errno[ksi->ksi_errno];
286 	lsi->lsi_code = ksi->ksi_code;
287 	switch (lsi->lsi_signo = frame.sf_sig) {
288 	case LINUX_SIGILL:
289 	case LINUX_SIGFPE:
290 	case LINUX_SIGSEGV:
291 	case LINUX_SIGBUS:
292 	case LINUX_SIGTRAP:
293 		lsi->lsi_addr = ksi->ksi_addr;
294 		break;
295 	case LINUX_SIGCHLD:
296 		lsi->lsi_uid = ksi->ksi_uid;
297 		lsi->lsi_pid = ksi->ksi_pid;
298 		lsi->lsi_utime = ksi->ksi_utime;
299 		lsi->lsi_stime = ksi->ksi_stime;
300 		/* XXX is that right? */
301 		lsi->lsi_status = WEXITSTATUS(ksi->ksi_status);
302 		break;
303 	case LINUX_SIGIO:
304 		lsi->lsi_band = ksi->ksi_band;
305 		lsi->lsi_fd = ksi->ksi_fd;
306 		break;
307 	default:
308 		lsi->lsi_uid = ksi->ksi_uid;
309 		lsi->lsi_pid = ksi->ksi_pid;
310 		if (lsi->lsi_signo == LINUX_SIGALRM ||
311 		    lsi->lsi_signo >= LINUX_SIGRTMIN)
312 			lsi->lsi_value.sival_ptr = ksi->ksi_value.sival_ptr;
313 		break;
314 	}
315 
316 	/* Save register context. */
317 	linux_save_ucontext(l, tf, mask, sas, &frame.sf_uc);
318 	sendsig_reset(l, sig);
319 
320 	mutex_exit(p->p_lock);
321 	error = copyout(&frame, fp, sizeof(frame));
322 	mutex_enter(p->p_lock);
323 
324 	if (error != 0) {
325 		/*
326 		 * Process has trashed its stack; give it an illegal
327 		 * instruction to halt it in its tracks.
328 		 */
329 		sigexit(l, SIGILL);
330 		/* NOTREACHED */
331 	}
332 
333 	/*
334 	 * Build context to run handler in.
335 	 */
336 	tf->tf_gs = GSEL(GUDATA_SEL, SEL_UPL);
337 	tf->tf_fs = GSEL(GUDATA_SEL, SEL_UPL);
338 	tf->tf_es = GSEL(GUDATA_SEL, SEL_UPL);
339 	tf->tf_ds = GSEL(GUDATA_SEL, SEL_UPL);
340 	tf->tf_eip = ((int)p->p_sigctx.ps_sigcode) +
341 	    (linux_rt_sigcode - linux_sigcode);
342 	tf->tf_cs = GSEL(GUCODE_SEL, SEL_UPL);
343 	tf->tf_eflags &= ~PSL_CLEARSIG;
344 	tf->tf_esp = (int)fp;
345 	tf->tf_ss = GSEL(GUDATA_SEL, SEL_UPL);
346 
347 	/* Remember that we're now on the signal stack. */
348 	if (onstack)
349 		sas->ss_flags |= SS_ONSTACK;
350 }
351 
352 static void
353 linux_old_sendsig(const ksiginfo_t *ksi, const sigset_t *mask)
354 {
355 	struct lwp *l = curlwp;
356 	struct proc *p = l->l_proc;
357 	struct trapframe *tf;
358 	struct linux_sigframe *fp, frame;
359 	int onstack, error;
360 	int sig = ksi->ksi_signo;
361 	sig_t catcher = SIGACTION(p, sig).sa_handler;
362 	struct sigaltstack *sas = &l->l_sigstk;
363 
364 	tf = l->l_md.md_regs;
365 
366 	/* Do we need to jump onto the signal stack? */
367 	onstack = (sas->ss_flags & (SS_DISABLE | SS_ONSTACK)) == 0 &&
368 	    (SIGACTION(p, sig).sa_flags & SA_ONSTACK) != 0;
369 
370 	/* Allocate space for the signal handler context. */
371 	if (onstack)
372 		fp = (struct linux_sigframe *) ((char *)sas->ss_sp +
373 		    sas->ss_size);
374 	else
375 		fp = (struct linux_sigframe *)tf->tf_esp;
376 	fp--;
377 
378 	DPRINTF(("old: onstack = %d, fp = %p sig = %d eip = 0x%x cr2 = 0x%x\n",
379 	    onstack, fp, sig, tf->tf_eip, l->l_addr->u_pcb.pcb_cr2));
380 
381 	/* Build stack frame for signal trampoline. */
382 	frame.sf_handler = catcher;
383 	frame.sf_sig = native_to_linux_signo[sig];
384 
385 	linux_save_sigcontext(l, tf, mask, &frame.sf_sc);
386 	sendsig_reset(l, sig);
387 
388 	mutex_exit(p->p_lock);
389 	error = copyout(&frame, fp, sizeof(frame));
390 	mutex_enter(p->p_lock);
391 
392 	if (error != 0) {
393 		/*
394 		 * Process has trashed its stack; give it an illegal
395 		 * instruction to halt it in its tracks.
396 		 */
397 		sigexit(l, SIGILL);
398 		/* NOTREACHED */
399 	}
400 
401 	/*
402 	 * Build context to run handler in.
403 	 */
404 	tf->tf_gs = GSEL(GUDATA_SEL, SEL_UPL);
405 	tf->tf_fs = GSEL(GUDATA_SEL, SEL_UPL);
406 	tf->tf_es = GSEL(GUDATA_SEL, SEL_UPL);
407 	tf->tf_ds = GSEL(GUDATA_SEL, SEL_UPL);
408 	tf->tf_eip = (int)p->p_sigctx.ps_sigcode;
409 	tf->tf_cs = GSEL(GUCODEBIG_SEL, SEL_UPL);
410 	tf->tf_eflags &= ~PSL_CLEARSIG;
411 	tf->tf_esp = (int)fp;
412 	tf->tf_ss = GSEL(GUDATA_SEL, SEL_UPL);
413 
414 	/* Remember that we're now on the signal stack. */
415 	if (onstack)
416 		sas->ss_flags |= SS_ONSTACK;
417 }
418 
419 /*
420  * System call to cleanup state after a signal
421  * has been taken.  Reset signal mask and
422  * stack state from context left by sendsig (above).
423  * Return to previous pc and psl as specified by
424  * context left by sendsig. Check carefully to
425  * make sure that the user has not modified the
426  * psl to gain improper privileges or to cause
427  * a machine fault.
428  */
429 int
430 linux_sys_rt_sigreturn(struct lwp *l, const struct linux_sys_rt_sigreturn_args *uap, register_t *retval)
431 {
432 	/* {
433 		syscallarg(struct linux_ucontext *) ucp;
434 	} */
435 	struct linux_ucontext context, *ucp = SCARG(uap, ucp);
436 	int error;
437 
438 	/*
439 	 * The trampoline code hands us the context.
440 	 * It is unsafe to keep track of it ourselves, in the event that a
441 	 * program jumps out of a signal handler.
442 	 */
443 	if ((error = copyin(ucp, &context, sizeof(*ucp))) != 0)
444 		return error;
445 
446 	/* XXX XAX we can do better here by using more of the ucontext */
447 	return linux_restore_sigcontext(l, &context.uc_mcontext, retval);
448 }
449 
450 int
451 linux_sys_sigreturn(struct lwp *l, const struct linux_sys_sigreturn_args *uap, register_t *retval)
452 {
453 	/* {
454 		syscallarg(struct linux_sigcontext *) scp;
455 	} */
456 	struct linux_sigcontext context, *scp = SCARG(uap, scp);
457 	int error;
458 
459 	/*
460 	 * The trampoline code hands us the context.
461 	 * It is unsafe to keep track of it ourselves, in the event that a
462 	 * program jumps out of a signal handler.
463 	 */
464 	if ((error = copyin((void *)scp, &context, sizeof(*scp))) != 0)
465 		return error;
466 	return linux_restore_sigcontext(l, &context, retval);
467 }
468 
469 static int
470 linux_restore_sigcontext(struct lwp *l, struct linux_sigcontext *scp,
471     register_t *retval)
472 {
473 	struct proc *p = l->l_proc;
474 	struct sigaltstack *sas = &l->l_sigstk;
475 	struct trapframe *tf;
476 	sigset_t mask;
477 	ssize_t ss_gap;
478 	/* Restore register context. */
479 	tf = l->l_md.md_regs;
480 
481 	DPRINTF(("sigreturn enter esp=%x eip=%x\n", tf->tf_esp, tf->tf_eip));
482 #ifdef VM86
483 	if (scp->sc_eflags & PSL_VM) {
484 		void syscall_vm86(struct trapframe *);
485 
486 		tf->tf_vm86_gs = scp->sc_gs;
487 		tf->tf_vm86_fs = scp->sc_fs;
488 		tf->tf_vm86_es = scp->sc_es;
489 		tf->tf_vm86_ds = scp->sc_ds;
490 		set_vflags(l, scp->sc_eflags);
491 		p->p_md.md_syscall = syscall_vm86;
492 	} else
493 #endif
494 	{
495 		/*
496 		 * Check for security violations.  If we're returning to
497 		 * protected mode, the CPU will validate the segment registers
498 		 * automatically and generate a trap on violations.  We handle
499 		 * the trap, rather than doing all of the checking here.
500 		 */
501 		if (((scp->sc_eflags ^ tf->tf_eflags) & PSL_USERSTATIC) != 0 ||
502 		    !USERMODE(scp->sc_cs, scp->sc_eflags))
503 			return EINVAL;
504 
505 		tf->tf_gs = scp->sc_gs;
506 		tf->tf_fs = scp->sc_fs;
507 		tf->tf_es = scp->sc_es;
508 		tf->tf_ds = scp->sc_ds;
509 #ifdef VM86
510 		if (tf->tf_eflags & PSL_VM)
511 			(*p->p_emul->e_syscall_intern)(p);
512 #endif
513 		tf->tf_eflags = scp->sc_eflags;
514 	}
515 	tf->tf_edi = scp->sc_edi;
516 	tf->tf_esi = scp->sc_esi;
517 	tf->tf_ebp = scp->sc_ebp;
518 	tf->tf_ebx = scp->sc_ebx;
519 	tf->tf_edx = scp->sc_edx;
520 	tf->tf_ecx = scp->sc_ecx;
521 	tf->tf_eax = scp->sc_eax;
522 	tf->tf_eip = scp->sc_eip;
523 	tf->tf_cs = scp->sc_cs;
524 	tf->tf_esp = scp->sc_esp_at_signal;
525 	tf->tf_ss = scp->sc_ss;
526 
527 	/* Restore signal stack. */
528 	/*
529 	 * Linux really does it this way; it doesn't have space in sigframe
530 	 * to save the onstack flag.
531 	 */
532 	mutex_enter(p->p_lock);
533 	ss_gap = (ssize_t)((char *)scp->sc_esp_at_signal - (char *)sas->ss_sp);
534 	if (ss_gap >= 0 && ss_gap < sas->ss_size)
535 		sas->ss_flags |= SS_ONSTACK;
536 	else
537 		sas->ss_flags &= ~SS_ONSTACK;
538 
539 	/* Restore signal mask. */
540 	linux_old_to_native_sigset(&mask, &scp->sc_mask);
541 	(void) sigprocmask1(l, SIG_SETMASK, &mask, 0);
542 	mutex_exit(p->p_lock);
543 
544 	DPRINTF(("sigreturn exit esp=%x eip=%x\n", tf->tf_esp, tf->tf_eip));
545 	return EJUSTRETURN;
546 }
547 
548 #ifdef USER_LDT
549 
550 static int
551 linux_read_ldt(struct lwp *l, const struct linux_sys_modify_ldt_args *uap,
552     register_t *retval)
553 {
554 	struct x86_get_ldt_args gl;
555 	int error;
556 	int num_ldt;
557 	union descriptor *ldt_buf;
558 
559 	/*
560 	 * I've checked the linux code - this function is asymetric with
561 	 * linux_write_ldt, and returns raw ldt entries.
562 	 * NB, the code I saw zerod the spare parts of the user buffer.
563 	 */
564 
565 	DPRINTF(("linux_read_ldt!"));
566 
567 	num_ldt = x86_get_ldt_len(l);
568 	if (num_ldt <= 0)
569 		return EINVAL;
570 
571 	gl.start = 0;
572 	gl.desc = NULL;
573 	gl.num = SCARG(uap, bytecount) / sizeof(union descriptor);
574 
575 	if (gl.num > num_ldt)
576 		gl.num = num_ldt;
577 
578 	ldt_buf = malloc(gl.num * sizeof *ldt, M_TEMP, M_WAITOK);
579 
580 	error = x86_get_ldt1(l, &gl, ldt_buf);
581 	/* NB gl.num might have changed */
582 	if (error == 0) {
583 		*retval = gl.num * sizeof *ldt;
584 		error = copyout(ldt_buf, SCARG(uap, ptr),
585 		    gl.num * sizeof *ldt_buf);
586 	}
587 	free(ldt_buf, M_TEMP);
588 
589 	return error;
590 }
591 
592 struct linux_ldt_info {
593 	u_int entry_number;
594 	u_long base_addr;
595 	u_int limit;
596 	u_int seg_32bit:1;
597 	u_int contents:2;
598 	u_int read_exec_only:1;
599 	u_int limit_in_pages:1;
600 	u_int seg_not_present:1;
601 	u_int useable:1;
602 };
603 
604 static int
605 linux_write_ldt(struct lwp *l, const struct linux_sys_modify_ldt_args *uap,
606     int oldmode)
607 {
608 	struct linux_ldt_info ldt_info;
609 	union descriptor d;
610 	struct x86_set_ldt_args sl;
611 	int error;
612 
613 	DPRINTF(("linux_write_ldt %d\n", oldmode));
614 	if (SCARG(uap, bytecount) != sizeof(ldt_info))
615 		return (EINVAL);
616 	if ((error = copyin(SCARG(uap, ptr), &ldt_info, sizeof(ldt_info))) != 0)
617 		return error;
618 	if (ldt_info.entry_number >= 8192)
619 		return (EINVAL);
620 	if (ldt_info.contents == 3) {
621 		if (oldmode)
622 			return (EINVAL);
623 		if (ldt_info.seg_not_present)
624 			return (EINVAL);
625 	}
626 
627 	if (ldt_info.base_addr == 0 && ldt_info.limit == 0 &&
628 	    (oldmode || (ldt_info.contents == 0 &&
629 	    ldt_info.read_exec_only == 1 && ldt_info.seg_32bit == 0 &&
630 	    ldt_info.limit_in_pages == 0 && ldt_info.seg_not_present == 1 &&
631 	    ldt_info.useable == 0))) {
632 		/* this means you should zero the ldt */
633 		(void)memset(&d, 0, sizeof(d));
634 	} else {
635 		d.sd.sd_lobase = ldt_info.base_addr & 0xffffff;
636 		d.sd.sd_hibase = (ldt_info.base_addr >> 24) & 0xff;
637 		d.sd.sd_lolimit = ldt_info.limit & 0xffff;
638 		d.sd.sd_hilimit = (ldt_info.limit >> 16) & 0xf;
639 		d.sd.sd_type = 16 | (ldt_info.contents << 2) |
640 		    (!ldt_info.read_exec_only << 1);
641 		d.sd.sd_dpl = SEL_UPL;
642 		d.sd.sd_p = !ldt_info.seg_not_present;
643 		d.sd.sd_def32 = ldt_info.seg_32bit;
644 		d.sd.sd_gran = ldt_info.limit_in_pages;
645 		if (!oldmode)
646 			d.sd.sd_xx = ldt_info.useable;
647 		else
648 			d.sd.sd_xx = 0;
649 	}
650 	sl.start = ldt_info.entry_number;
651 	sl.desc = NULL;;
652 	sl.num = 1;
653 
654 	DPRINTF(("linux_write_ldt: idx=%d, base=0x%lx, limit=0x%x\n",
655 	    ldt_info.entry_number, ldt_info.base_addr, ldt_info.limit));
656 
657 	return x86_set_ldt1(l, &sl, &d);
658 }
659 
660 #endif /* USER_LDT */
661 
662 int
663 linux_sys_modify_ldt(struct lwp *l, const struct linux_sys_modify_ldt_args *uap, register_t *retval)
664 {
665 	/* {
666 		syscallarg(int) func;
667 		syscallarg(void *) ptr;
668 		syscallarg(size_t) bytecount;
669 	} */
670 
671 	switch (SCARG(uap, func)) {
672 #ifdef USER_LDT
673 	case 0:
674 		return linux_read_ldt(l, (const void *)uap, retval);
675 	case 1:
676 		return linux_write_ldt(l, (const void *)uap, 1);
677 	case 2:
678 #ifdef notyet
679 		return (linux_read_default_ldt(l, (const void *)uap, retval);
680 #else
681 		return (ENOSYS);
682 #endif
683 	case 0x11:
684 		return linux_write_ldt(l, (const void *)uap, 0);
685 #endif /* USER_LDT */
686 
687 	default:
688 		return (ENOSYS);
689 	}
690 }
691 
692 /*
693  * XXX Pathetic hack to make svgalib work. This will fake the major
694  * device number of an opened VT so that svgalib likes it. grmbl.
695  * Should probably do it 'wrong the right way' and use a mapping
696  * array for all major device numbers, and map linux_mknod too.
697  */
698 dev_t
699 linux_fakedev(dev_t dev, int raw)
700 {
701 	extern const struct cdevsw ptc_cdevsw, pts_cdevsw;
702 	const struct cdevsw *cd = cdevsw_lookup(dev);
703 
704 	if (raw) {
705 #if (NWSDISPLAY > 0)
706 		extern const struct cdevsw wsdisplay_cdevsw;
707 		if (cd == &wsdisplay_cdevsw)
708 			return makedev(LINUX_CONS_MAJOR, (minor(dev) + 1));
709 #endif
710 	}
711 
712 	if (cd == &ptc_cdevsw)
713 		return makedev(LINUX_PTC_MAJOR, minor(dev));
714 	if (cd == &pts_cdevsw)
715 		return makedev(LINUX_PTS_MAJOR, minor(dev));
716 
717 	return dev;
718 }
719 
720 #if (NWSDISPLAY > 0)
721 /*
722  * That's not complete, but enough to get an X server running.
723  */
724 #define NR_KEYS 128
725 static const u_short plain_map[NR_KEYS] = {
726 	0x0200,	0x001b,	0x0031,	0x0032,	0x0033,	0x0034,	0x0035,	0x0036,
727 	0x0037,	0x0038,	0x0039,	0x0030,	0x002d,	0x003d,	0x007f,	0x0009,
728 	0x0b71,	0x0b77,	0x0b65,	0x0b72,	0x0b74,	0x0b79,	0x0b75,	0x0b69,
729 	0x0b6f,	0x0b70,	0x005b,	0x005d,	0x0201,	0x0702,	0x0b61,	0x0b73,
730 	0x0b64,	0x0b66,	0x0b67,	0x0b68,	0x0b6a,	0x0b6b,	0x0b6c,	0x003b,
731 	0x0027,	0x0060,	0x0700,	0x005c,	0x0b7a,	0x0b78,	0x0b63,	0x0b76,
732 	0x0b62,	0x0b6e,	0x0b6d,	0x002c,	0x002e,	0x002f,	0x0700,	0x030c,
733 	0x0703,	0x0020,	0x0207,	0x0100,	0x0101,	0x0102,	0x0103,	0x0104,
734 	0x0105,	0x0106,	0x0107,	0x0108,	0x0109,	0x0208,	0x0209,	0x0307,
735 	0x0308,	0x0309,	0x030b,	0x0304,	0x0305,	0x0306,	0x030a,	0x0301,
736 	0x0302,	0x0303,	0x0300,	0x0310,	0x0206,	0x0200,	0x003c,	0x010a,
737 	0x010b,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
738 	0x030e,	0x0702,	0x030d,	0x001c,	0x0701,	0x0205,	0x0114,	0x0603,
739 	0x0118,	0x0601,	0x0602,	0x0117,	0x0600,	0x0119,	0x0115,	0x0116,
740 	0x011a,	0x010c,	0x010d,	0x011b,	0x011c,	0x0110,	0x0311,	0x011d,
741 	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
742 }, shift_map[NR_KEYS] = {
743 	0x0200,	0x001b,	0x0021,	0x0040,	0x0023,	0x0024,	0x0025,	0x005e,
744 	0x0026,	0x002a,	0x0028,	0x0029,	0x005f,	0x002b,	0x007f,	0x0009,
745 	0x0b51,	0x0b57,	0x0b45,	0x0b52,	0x0b54,	0x0b59,	0x0b55,	0x0b49,
746 	0x0b4f,	0x0b50,	0x007b,	0x007d,	0x0201,	0x0702,	0x0b41,	0x0b53,
747 	0x0b44,	0x0b46,	0x0b47,	0x0b48,	0x0b4a,	0x0b4b,	0x0b4c,	0x003a,
748 	0x0022,	0x007e,	0x0700,	0x007c,	0x0b5a,	0x0b58,	0x0b43,	0x0b56,
749 	0x0b42,	0x0b4e,	0x0b4d,	0x003c,	0x003e,	0x003f,	0x0700,	0x030c,
750 	0x0703,	0x0020,	0x0207,	0x010a,	0x010b,	0x010c,	0x010d,	0x010e,
751 	0x010f,	0x0110,	0x0111,	0x0112,	0x0113,	0x0213,	0x0203,	0x0307,
752 	0x0308,	0x0309,	0x030b,	0x0304,	0x0305,	0x0306,	0x030a,	0x0301,
753 	0x0302,	0x0303,	0x0300,	0x0310,	0x0206,	0x0200,	0x003e,	0x010a,
754 	0x010b,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
755 	0x030e,	0x0702,	0x030d,	0x0200,	0x0701,	0x0205,	0x0114,	0x0603,
756 	0x020b,	0x0601,	0x0602,	0x0117,	0x0600,	0x020a,	0x0115,	0x0116,
757 	0x011a,	0x010c,	0x010d,	0x011b,	0x011c,	0x0110,	0x0311,	0x011d,
758 	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
759 }, altgr_map[NR_KEYS] = {
760 	0x0200,	0x0200,	0x0200,	0x0040,	0x0200,	0x0024,	0x0200,	0x0200,
761 	0x007b,	0x005b,	0x005d,	0x007d,	0x005c,	0x0200,	0x0200,	0x0200,
762 	0x0b71,	0x0b77,	0x0918,	0x0b72,	0x0b74,	0x0b79,	0x0b75,	0x0b69,
763 	0x0b6f,	0x0b70,	0x0200,	0x007e,	0x0201,	0x0702,	0x0914,	0x0b73,
764 	0x0917,	0x0919,	0x0b67,	0x0b68,	0x0b6a,	0x0b6b,	0x0b6c,	0x0200,
765 	0x0200,	0x0200,	0x0700,	0x0200,	0x0b7a,	0x0b78,	0x0916,	0x0b76,
766 	0x0915,	0x0b6e,	0x0b6d,	0x0200,	0x0200,	0x0200,	0x0700,	0x030c,
767 	0x0703,	0x0200,	0x0207,	0x050c,	0x050d,	0x050e,	0x050f,	0x0510,
768 	0x0511,	0x0512,	0x0513,	0x0514,	0x0515,	0x0208,	0x0202,	0x0911,
769 	0x0912,	0x0913,	0x030b,	0x090e,	0x090f,	0x0910,	0x030a,	0x090b,
770 	0x090c,	0x090d,	0x090a,	0x0310,	0x0206,	0x0200,	0x007c,	0x0516,
771 	0x0517,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
772 	0x030e,	0x0702,	0x030d,	0x0200,	0x0701,	0x0205,	0x0114,	0x0603,
773 	0x0118,	0x0601,	0x0602,	0x0117,	0x0600,	0x0119,	0x0115,	0x0116,
774 	0x011a,	0x010c,	0x010d,	0x011b,	0x011c,	0x0110,	0x0311,	0x011d,
775 	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
776 }, ctrl_map[NR_KEYS] = {
777 	0x0200,	0x0200,	0x0200,	0x0000,	0x001b,	0x001c,	0x001d,	0x001e,
778 	0x001f,	0x007f,	0x0200,	0x0200,	0x001f,	0x0200,	0x0008,	0x0200,
779 	0x0011,	0x0017,	0x0005,	0x0012,	0x0014,	0x0019,	0x0015,	0x0009,
780 	0x000f,	0x0010,	0x001b,	0x001d,	0x0201,	0x0702,	0x0001,	0x0013,
781 	0x0004,	0x0006,	0x0007,	0x0008,	0x000a,	0x000b,	0x000c,	0x0200,
782 	0x0007,	0x0000,	0x0700,	0x001c,	0x001a,	0x0018,	0x0003,	0x0016,
783 	0x0002,	0x000e,	0x000d,	0x0200,	0x020e,	0x007f,	0x0700,	0x030c,
784 	0x0703,	0x0000,	0x0207,	0x0100,	0x0101,	0x0102,	0x0103,	0x0104,
785 	0x0105,	0x0106,	0x0107,	0x0108,	0x0109,	0x0208,	0x0204,	0x0307,
786 	0x0308,	0x0309,	0x030b,	0x0304,	0x0305,	0x0306,	0x030a,	0x0301,
787 	0x0302,	0x0303,	0x0300,	0x0310,	0x0206,	0x0200,	0x0200,	0x010a,
788 	0x010b,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
789 	0x030e,	0x0702,	0x030d,	0x001c,	0x0701,	0x0205,	0x0114,	0x0603,
790 	0x0118,	0x0601,	0x0602,	0x0117,	0x0600,	0x0119,	0x0115,	0x0116,
791 	0x011a,	0x010c,	0x010d,	0x011b,	0x011c,	0x0110,	0x0311,	0x011d,
792 	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
793 };
794 
795 const u_short * const linux_keytabs[] = {
796 	plain_map, shift_map, altgr_map, altgr_map, ctrl_map
797 };
798 #endif
799 
800 static struct biosdisk_info *
801 fd2biosinfo(struct proc *p, struct file *fp)
802 {
803 	struct vnode *vp;
804 	const char *blkname;
805 	char diskname[16];
806 	int i;
807 	struct nativedisk_info *nip;
808 	struct disklist *dl = x86_alldisks;
809 
810 	if (fp->f_type != DTYPE_VNODE)
811 		return NULL;
812 	vp = (struct vnode *)fp->f_data;
813 
814 	if (vp->v_type != VBLK)
815 		return NULL;
816 
817 	blkname = devsw_blk2name(major(vp->v_rdev));
818 	snprintf(diskname, sizeof diskname, "%s%llu", blkname,
819 	    (unsigned long long)DISKUNIT(vp->v_rdev));
820 
821 	for (i = 0; i < dl->dl_nnativedisks; i++) {
822 		nip = &dl->dl_nativedisks[i];
823 		if (strcmp(diskname, nip->ni_devname))
824 			continue;
825 		if (nip->ni_nmatches != 0)
826 			return &dl->dl_biosdisks[nip->ni_biosmatches[0]];
827 	}
828 
829 	return NULL;
830 }
831 
832 
833 /*
834  * We come here in a last attempt to satisfy a Linux ioctl() call
835  */
836 int
837 linux_machdepioctl(struct lwp *l, const struct linux_sys_ioctl_args *uap, register_t *retval)
838 {
839 	/* {
840 		syscallarg(int) fd;
841 		syscallarg(u_long) com;
842 		syscallarg(void *) data;
843 	} */
844 	struct sys_ioctl_args bia;
845 	u_long com;
846 	int error, error1;
847 #if (NWSDISPLAY > 0)
848 	struct vt_mode lvt;
849 	struct kbentry kbe;
850 #endif
851 	struct linux_hd_geometry hdg;
852 	struct linux_hd_big_geometry hdg_big;
853 	struct biosdisk_info *bip;
854 	file_t *fp;
855 	int fd;
856 	struct disklabel label, *labp;
857 	struct partinfo partp;
858 	int (*ioctlf)(struct file *, u_long, void *);
859 	u_long start, biostotal, realtotal;
860 	u_char heads, sectors;
861 	u_int cylinders;
862 	struct ioctl_pt pt;
863 
864 	fd = SCARG(uap, fd);
865 	SCARG(&bia, fd) = fd;
866 	SCARG(&bia, data) = SCARG(uap, data);
867 	com = SCARG(uap, com);
868 
869 	if ((fp = fd_getfile(fd)) == NULL)
870 		return (EBADF);
871 
872 	switch (com) {
873 #if (NWSDISPLAY > 0)
874 	case LINUX_KDGKBMODE:
875 		com = KDGKBMODE;
876 		break;
877 	case LINUX_KDSKBMODE:
878 		com = KDSKBMODE;
879 		if ((unsigned)SCARG(uap, data) == LINUX_K_MEDIUMRAW)
880 			SCARG(&bia, data) = (void *)K_RAW;
881 		break;
882 	case LINUX_KIOCSOUND:
883 		SCARG(&bia, data) =
884 		    (void *)(((unsigned long)SCARG(&bia, data)) & 0xffff);
885 		/* fall through */
886 	case LINUX_KDMKTONE:
887 		com = KDMKTONE;
888 		break;
889 	case LINUX_KDSETMODE:
890 		com = KDSETMODE;
891 		break;
892 	case LINUX_KDGETMODE:
893 		/* KD_* values are equal to the wscons numbers */
894 		com = WSDISPLAYIO_GMODE;
895 		break;
896 	case LINUX_KDENABIO:
897 		com = KDENABIO;
898 		break;
899 	case LINUX_KDDISABIO:
900 		com = KDDISABIO;
901 		break;
902 	case LINUX_KDGETLED:
903 		com = KDGETLED;
904 		break;
905 	case LINUX_KDSETLED:
906 		com = KDSETLED;
907 		break;
908 	case LINUX_VT_OPENQRY:
909 		com = VT_OPENQRY;
910 		break;
911 	case LINUX_VT_GETMODE:
912 		error = fp->f_ops->fo_ioctl(fp, VT_GETMODE, &lvt);
913 		if (error != 0)
914 			goto out;
915 		lvt.relsig = native_to_linux_signo[lvt.relsig];
916 		lvt.acqsig = native_to_linux_signo[lvt.acqsig];
917 		lvt.frsig = native_to_linux_signo[lvt.frsig];
918 		error = copyout(&lvt, SCARG(uap, data), sizeof (lvt));
919 		goto out;
920 	case LINUX_VT_SETMODE:
921 		error = copyin(SCARG(uap, data), &lvt, sizeof (lvt));
922 		if (error != 0)
923 			goto out;
924 		lvt.relsig = linux_to_native_signo[lvt.relsig];
925 		lvt.acqsig = linux_to_native_signo[lvt.acqsig];
926 		lvt.frsig = linux_to_native_signo[lvt.frsig];
927 		error = fp->f_ops->fo_ioctl(fp, VT_SETMODE, &lvt);
928 		goto out;
929 	case LINUX_VT_DISALLOCATE:
930 		/* XXX should use WSDISPLAYIO_DELSCREEN */
931 		error = 0;
932 		goto out;
933 	case LINUX_VT_RELDISP:
934 		com = VT_RELDISP;
935 		break;
936 	case LINUX_VT_ACTIVATE:
937 		com = VT_ACTIVATE;
938 		break;
939 	case LINUX_VT_WAITACTIVE:
940 		com = VT_WAITACTIVE;
941 		break;
942 	case LINUX_VT_GETSTATE:
943 		com = VT_GETSTATE;
944 		break;
945 	case LINUX_KDGKBTYPE:
946 	    {
947 		static const u_int8_t kb101 = KB_101;
948 
949 		/* This is what Linux does. */
950 		error = copyout(&kb101, SCARG(uap, data), 1);
951 		goto out;
952 	    }
953 	case LINUX_KDGKBENT:
954 		/*
955 		 * The Linux KDGKBENT ioctl is different from the
956 		 * SYSV original. So we handle it in machdep code.
957 		 * XXX We should use keyboard mapping information
958 		 * from wsdisplay, but this would be expensive.
959 		 */
960 		if ((error = copyin(SCARG(uap, data), &kbe,
961 				    sizeof(struct kbentry))))
962 			goto out;
963 		if (kbe.kb_table >= sizeof(linux_keytabs) / sizeof(u_short *)
964 		    || kbe.kb_index >= NR_KEYS) {
965 			error = EINVAL;
966 			goto out;
967 		}
968 		kbe.kb_value = linux_keytabs[kbe.kb_table][kbe.kb_index];
969 		error = copyout(&kbe, SCARG(uap, data),
970 				sizeof(struct kbentry));
971 		goto out;
972 #endif
973 	case LINUX_HDIO_GETGEO:
974 	case LINUX_HDIO_GETGEO_BIG:
975 		/*
976 		 * Try to mimic Linux behaviour: return the BIOS geometry
977 		 * if possible (extending its # of cylinders if it's beyond
978 		 * the 1023 limit), fall back to the MI geometry (i.e.
979 		 * the real geometry) if not found, by returning an
980 		 * error. See common/linux_hdio.c
981 		 */
982 		bip = fd2biosinfo(curproc, fp);
983 		ioctlf = fp->f_ops->fo_ioctl;
984 		error = ioctlf(fp, DIOCGDEFLABEL, (void *)&label);
985 		error1 = ioctlf(fp, DIOCGPART, (void *)&partp);
986 		if (error != 0 && error1 != 0) {
987 			error = error1;
988 			goto out;
989 		}
990 		labp = error != 0 ? &label : partp.disklab;
991 		start = error1 != 0 ? partp.part->p_offset : 0;
992 		if (bip != NULL && bip->bi_head != 0 && bip->bi_sec != 0
993 		    && bip->bi_cyl != 0) {
994 			heads = bip->bi_head;
995 			sectors = bip->bi_sec;
996 			cylinders = bip->bi_cyl;
997 			biostotal = heads * sectors * cylinders;
998 			realtotal = labp->d_ntracks * labp->d_nsectors *
999 			    labp->d_ncylinders;
1000 			if (realtotal > biostotal)
1001 				cylinders = realtotal / (heads * sectors);
1002 		} else {
1003 			heads = labp->d_ntracks;
1004 			cylinders = labp->d_ncylinders;
1005 			sectors = labp->d_nsectors;
1006 		}
1007 		if (com == LINUX_HDIO_GETGEO) {
1008 			hdg.start = start;
1009 			hdg.heads = heads;
1010 			hdg.cylinders = cylinders;
1011 			hdg.sectors = sectors;
1012 			error = copyout(&hdg, SCARG(uap, data), sizeof hdg);
1013 			goto out;
1014 		} else {
1015 			hdg_big.start = start;
1016 			hdg_big.heads = heads;
1017 			hdg_big.cylinders = cylinders;
1018 			hdg_big.sectors = sectors;
1019 			error = copyout(&hdg_big, SCARG(uap, data),
1020 			    sizeof hdg_big);
1021 			goto out;
1022 		}
1023 
1024 	default:
1025 		/*
1026 		 * Unknown to us. If it's on a device, just pass it through
1027 		 * using PTIOCLINUX, the device itself might be able to
1028 		 * make some sense of it.
1029 		 * XXX hack: if the function returns EJUSTRETURN,
1030 		 * it has stuffed a sysctl return value in pt.data.
1031 		 */
1032 		ioctlf = fp->f_ops->fo_ioctl;
1033 		pt.com = SCARG(uap, com);
1034 		pt.data = SCARG(uap, data);
1035 		error = ioctlf(fp, PTIOCLINUX, &pt);
1036 		if (error == EJUSTRETURN) {
1037 			retval[0] = (register_t)pt.data;
1038 			error = 0;
1039 		}
1040 
1041 		if (error == ENOTTY) {
1042 			DPRINTF(("linux_machdepioctl: invalid ioctl %08lx\n",
1043 			    com));
1044 		}
1045 		goto out;
1046 	}
1047 	SCARG(&bia, com) = com;
1048 	error = sys_ioctl(curlwp, &bia, retval);
1049 out:
1050 	fd_putfile(fd);
1051 	return error;
1052 }
1053 
1054 /*
1055  * Set I/O permissions for a process. Just set the maximum level
1056  * right away (ignoring the argument), otherwise we would have
1057  * to rely on I/O permission maps, which are not implemented.
1058  */
1059 int
1060 linux_sys_iopl(struct lwp *l, const struct linux_sys_iopl_args *uap, register_t *retval)
1061 {
1062 	/* {
1063 		syscallarg(int) level;
1064 	} */
1065 	struct trapframe *fp = l->l_md.md_regs;
1066 
1067 	if (kauth_authorize_machdep(l->l_cred, KAUTH_MACHDEP_IOPL,
1068 	    NULL, NULL, NULL, NULL) != 0)
1069 		return EPERM;
1070 	fp->tf_eflags |= PSL_IOPL;
1071 	*retval = 0;
1072 	return 0;
1073 }
1074 
1075 /*
1076  * See above. If a root process tries to set access to an I/O port,
1077  * just let it have the whole range.
1078  */
1079 int
1080 linux_sys_ioperm(struct lwp *l, const struct linux_sys_ioperm_args *uap, register_t *retval)
1081 {
1082 	/* {
1083 		syscallarg(unsigned int) lo;
1084 		syscallarg(unsigned int) hi;
1085 		syscallarg(int) val;
1086 	} */
1087 	struct trapframe *fp = l->l_md.md_regs;
1088 
1089 	if (kauth_authorize_machdep(l->l_cred, SCARG(uap, val) ?
1090 	    KAUTH_MACHDEP_IOPERM_SET : KAUTH_MACHDEP_IOPERM_GET, NULL, NULL,
1091 	    NULL, NULL) != 0)
1092 		return EPERM;
1093 	if (SCARG(uap, val))
1094 		fp->tf_eflags |= PSL_IOPL;
1095 	*retval = 0;
1096 	return 0;
1097 }
1098 
1099 int
1100 linux_usertrap(struct lwp *l, vaddr_t trapaddr,
1101     void *arg)
1102 {
1103 	return 0;
1104 }
1105 
1106 const char *
1107 linux_get_uname_arch(void)
1108 {
1109 	static char uname_arch[5] = "i386";
1110 
1111 	if (uname_arch[1] == '3')
1112 		uname_arch[1] += cpu_class;
1113 	return uname_arch;
1114 }
1115 
1116 #ifdef LINUX_NPTL
1117 void *
1118 linux_get_newtls(struct lwp *l)
1119 {
1120 #if 0
1121 	struct trapframe *tf = l->l_md.md_regs;
1122 #endif
1123 
1124 	/* XXX: Implement me */
1125 	return NULL;
1126 }
1127 
1128 int
1129 linux_set_newtls(struct lwp *l, void *tls)
1130 {
1131 	/* XXX: Implement me */
1132 	return 0;
1133 }
1134 #endif
1135