xref: /netbsd-src/sys/compat/linux/arch/i386/linux_machdep.c (revision 267197ec1eebfcb9810ea27a89625b6ddf68e3e7)
1 /*	$NetBSD: linux_machdep.c,v 1.133 2007/12/20 23:02:52 dsl Exp $	*/
2 
3 /*-
4  * Copyright (c) 1995, 2000 The NetBSD Foundation, Inc.
5  * All rights reserved.
6  *
7  * This code is derived from software contributed to The NetBSD Foundation
8  * by Frank van der Linden.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  * 3. All advertising materials mentioning features or use of this software
19  *    must display the following acknowledgement:
20  *	This product includes software developed by the NetBSD
21  *	Foundation, Inc. and its contributors.
22  * 4. Neither the name of The NetBSD Foundation nor the names of its
23  *    contributors may be used to endorse or promote products derived
24  *    from this software without specific prior written permission.
25  *
26  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
27  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
28  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
29  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
30  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36  * POSSIBILITY OF SUCH DAMAGE.
37  */
38 
39 #include <sys/cdefs.h>
40 __KERNEL_RCSID(0, "$NetBSD: linux_machdep.c,v 1.133 2007/12/20 23:02:52 dsl Exp $");
41 
42 #if defined(_KERNEL_OPT)
43 #include "opt_vm86.h"
44 #include "opt_user_ldt.h"
45 #endif
46 
47 #include <sys/param.h>
48 #include <sys/systm.h>
49 #include <sys/signalvar.h>
50 #include <sys/kernel.h>
51 #include <sys/proc.h>
52 #include <sys/user.h>
53 #include <sys/buf.h>
54 #include <sys/reboot.h>
55 #include <sys/conf.h>
56 #include <sys/exec.h>
57 #include <sys/file.h>
58 #include <sys/callout.h>
59 #include <sys/malloc.h>
60 #include <sys/mbuf.h>
61 #include <sys/msgbuf.h>
62 #include <sys/mount.h>
63 #include <sys/vnode.h>
64 #include <sys/device.h>
65 #include <sys/syscallargs.h>
66 #include <sys/filedesc.h>
67 #include <sys/exec_elf.h>
68 #include <sys/disklabel.h>
69 #include <sys/ioctl.h>
70 #include <sys/wait.h>
71 #include <sys/kauth.h>
72 
73 #include <miscfs/specfs/specdev.h>
74 
75 #include <compat/linux/common/linux_types.h>
76 #include <compat/linux/common/linux_signal.h>
77 #include <compat/linux/common/linux_util.h>
78 #include <compat/linux/common/linux_ioctl.h>
79 #include <compat/linux/common/linux_hdio.h>
80 #include <compat/linux/common/linux_exec.h>
81 #include <compat/linux/common/linux_machdep.h>
82 #include <compat/linux/common/linux_errno.h>
83 
84 #include <compat/linux/linux_syscallargs.h>
85 
86 #include <sys/cpu.h>
87 #include <machine/cpufunc.h>
88 #include <machine/psl.h>
89 #include <machine/reg.h>
90 #include <machine/segments.h>
91 #include <machine/specialreg.h>
92 #include <machine/sysarch.h>
93 #include <machine/vm86.h>
94 #include <machine/vmparam.h>
95 
96 /*
97  * To see whether wscons is configured (for virtual console ioctl calls).
98  */
99 #if defined(_KERNEL_OPT)
100 #include "wsdisplay.h"
101 #endif
102 #if (NWSDISPLAY > 0)
103 #include <dev/wscons/wsconsio.h>
104 #include <dev/wscons/wsdisplay_usl_io.h>
105 #if defined(_KERNEL_OPT)
106 #include "opt_xserver.h"
107 #endif
108 #endif
109 
110 #ifdef DEBUG_LINUX
111 #define DPRINTF(a) uprintf a
112 #else
113 #define DPRINTF(a)
114 #endif
115 
116 static struct biosdisk_info *fd2biosinfo(struct proc *, struct file *);
117 extern struct disklist *x86_alldisks;
118 static void linux_save_ucontext(struct lwp *, struct trapframe *,
119     const sigset_t *, struct sigaltstack *, struct linux_ucontext *);
120 static void linux_save_sigcontext(struct lwp *, struct trapframe *,
121     const sigset_t *, struct linux_sigcontext *);
122 static int linux_restore_sigcontext(struct lwp *,
123     struct linux_sigcontext *, register_t *);
124 static void linux_rt_sendsig(const ksiginfo_t *, const sigset_t *);
125 static void linux_old_sendsig(const ksiginfo_t *, const sigset_t *);
126 
127 extern char linux_sigcode[], linux_rt_sigcode[];
128 /*
129  * Deal with some i386-specific things in the Linux emulation code.
130  */
131 
132 void
133 linux_setregs(struct lwp *l, struct exec_package *epp, u_long stack)
134 {
135 	struct pcb *pcb = &l->l_addr->u_pcb;
136 	struct trapframe *tf;
137 
138 #if NNPX > 0
139 	/* If we were using the FPU, forget about it. */
140 	if (npxproc == l)
141 		npxdrop();
142 #endif
143 
144 #ifdef USER_LDT
145 	pmap_ldt_cleanup(l);
146 #endif
147 
148 	l->l_md.md_flags &= ~MDL_USEDFPU;
149 
150 	if (i386_use_fxsave) {
151 		pcb->pcb_savefpu.sv_xmm.sv_env.en_cw = __Linux_NPXCW__;
152 		pcb->pcb_savefpu.sv_xmm.sv_env.en_mxcsr = __INITIAL_MXCSR__;
153 	} else
154 		pcb->pcb_savefpu.sv_87.sv_env.en_cw = __Linux_NPXCW__;
155 
156 	tf = l->l_md.md_regs;
157 	tf->tf_gs = GSEL(GUDATA_SEL, SEL_UPL);
158 	tf->tf_fs = GSEL(GUDATA_SEL, SEL_UPL);
159 	tf->tf_es = GSEL(GUDATA_SEL, SEL_UPL);
160 	tf->tf_ds = GSEL(GUDATA_SEL, SEL_UPL);
161 	tf->tf_edi = 0;
162 	tf->tf_esi = 0;
163 	tf->tf_ebp = 0;
164 	tf->tf_ebx = (int)l->l_proc->p_psstr;
165 	tf->tf_edx = 0;
166 	tf->tf_ecx = 0;
167 	tf->tf_eax = 0;
168 	tf->tf_eip = epp->ep_entry;
169 	tf->tf_cs = GSEL(GUCODEBIG_SEL, SEL_UPL);
170 	tf->tf_eflags = PSL_USERSET;
171 	tf->tf_esp = stack;
172 	tf->tf_ss = GSEL(GUDATA_SEL, SEL_UPL);
173 }
174 
175 /*
176  * Send an interrupt to process.
177  *
178  * Stack is set up to allow sigcode stored
179  * in u. to call routine, followed by kcall
180  * to sigreturn routine below.  After sigreturn
181  * resets the signal mask, the stack, and the
182  * frame pointer, it returns to the user
183  * specified pc, psl.
184  */
185 
186 void
187 linux_sendsig(const ksiginfo_t *ksi, const sigset_t *mask)
188 {
189 	if (SIGACTION(curproc, ksi->ksi_signo).sa_flags & SA_SIGINFO)
190 		linux_rt_sendsig(ksi, mask);
191 	else
192 		linux_old_sendsig(ksi, mask);
193 }
194 
195 
196 static void
197 linux_save_ucontext(struct lwp *l, struct trapframe *tf, const sigset_t *mask, struct sigaltstack *sas, struct linux_ucontext *uc)
198 {
199 	uc->uc_flags = 0;
200 	uc->uc_link = NULL;
201 	native_to_linux_sigaltstack(&uc->uc_stack, sas);
202 	linux_save_sigcontext(l, tf, mask, &uc->uc_mcontext);
203 	native_to_linux_sigset(&uc->uc_sigmask, mask);
204 	(void)memset(&uc->uc_fpregs_mem, 0, sizeof(uc->uc_fpregs_mem));
205 }
206 
207 static void
208 linux_save_sigcontext(struct lwp *l, struct trapframe *tf, const sigset_t *mask, struct linux_sigcontext *sc)
209 {
210 	/* Save register context. */
211 #ifdef VM86
212 	if (tf->tf_eflags & PSL_VM) {
213 		sc->sc_gs = tf->tf_vm86_gs;
214 		sc->sc_fs = tf->tf_vm86_fs;
215 		sc->sc_es = tf->tf_vm86_es;
216 		sc->sc_ds = tf->tf_vm86_ds;
217 		sc->sc_eflags = get_vflags(l);
218 	} else
219 #endif
220 	{
221 		sc->sc_gs = tf->tf_gs;
222 		sc->sc_fs = tf->tf_fs;
223 		sc->sc_es = tf->tf_es;
224 		sc->sc_ds = tf->tf_ds;
225 		sc->sc_eflags = tf->tf_eflags;
226 	}
227 	sc->sc_edi = tf->tf_edi;
228 	sc->sc_esi = tf->tf_esi;
229 	sc->sc_esp = tf->tf_esp;
230 	sc->sc_ebp = tf->tf_ebp;
231 	sc->sc_ebx = tf->tf_ebx;
232 	sc->sc_edx = tf->tf_edx;
233 	sc->sc_ecx = tf->tf_ecx;
234 	sc->sc_eax = tf->tf_eax;
235 	sc->sc_eip = tf->tf_eip;
236 	sc->sc_cs = tf->tf_cs;
237 	sc->sc_esp_at_signal = tf->tf_esp;
238 	sc->sc_ss = tf->tf_ss;
239 	sc->sc_err = tf->tf_err;
240 	sc->sc_trapno = tf->tf_trapno;
241 	sc->sc_cr2 = l->l_addr->u_pcb.pcb_cr2;
242 	sc->sc_387 = NULL;
243 
244 	/* Save signal stack. */
245 	/* Linux doesn't save the onstack flag in sigframe */
246 
247 	/* Save signal mask. */
248 	native_to_linux_old_sigset(&sc->sc_mask, mask);
249 }
250 
251 static void
252 linux_rt_sendsig(const ksiginfo_t *ksi, const sigset_t *mask)
253 {
254 	struct lwp *l = curlwp;
255 	struct proc *p = l->l_proc;
256 	struct trapframe *tf;
257 	struct linux_rt_sigframe *fp, frame;
258 	int onstack, error;
259 	linux_siginfo_t *lsi;
260 	int sig = ksi->ksi_signo;
261 	sig_t catcher = SIGACTION(p, sig).sa_handler;
262 	struct sigaltstack *sas = &l->l_sigstk;
263 
264 	tf = l->l_md.md_regs;
265 	/* Do we need to jump onto the signal stack? */
266 	onstack = (sas->ss_flags & (SS_DISABLE | SS_ONSTACK)) == 0 &&
267 	    (SIGACTION(p, sig).sa_flags & SA_ONSTACK) != 0;
268 
269 
270 	/* Allocate space for the signal handler context. */
271 	if (onstack)
272 		fp = (struct linux_rt_sigframe *)((char *)sas->ss_sp +
273 		    sas->ss_size);
274 	else
275 		fp = (struct linux_rt_sigframe *)tf->tf_esp;
276 	fp--;
277 
278 	DPRINTF(("rt: onstack = %d, fp = %p sig = %d eip = 0x%x cr2 = 0x%x\n",
279 	    onstack, fp, sig, tf->tf_eip, l->l_addr->u_pcb.pcb_cr2));
280 
281 	/* Build stack frame for signal trampoline. */
282 	frame.sf_handler = catcher;
283 	frame.sf_sig = native_to_linux_signo[sig];
284 	frame.sf_sip = &fp->sf_si;
285 	frame.sf_ucp = &fp->sf_uc;
286 
287 	/*
288 	 * XXX: the following code assumes that the constants for
289 	 * siginfo are the same between linux and NetBSD.
290 	 */
291 	(void)memset(lsi = &frame.sf_si, 0, sizeof(frame.sf_si));
292 	lsi->lsi_errno = native_to_linux_errno[ksi->ksi_errno];
293 	lsi->lsi_code = ksi->ksi_code;
294 	switch (lsi->lsi_signo = frame.sf_sig) {
295 	case LINUX_SIGILL:
296 	case LINUX_SIGFPE:
297 	case LINUX_SIGSEGV:
298 	case LINUX_SIGBUS:
299 	case LINUX_SIGTRAP:
300 		lsi->lsi_addr = ksi->ksi_addr;
301 		break;
302 	case LINUX_SIGCHLD:
303 		lsi->lsi_uid = ksi->ksi_uid;
304 		lsi->lsi_pid = ksi->ksi_pid;
305 		lsi->lsi_utime = ksi->ksi_utime;
306 		lsi->lsi_stime = ksi->ksi_stime;
307 
308 		/* We use the same codes */
309 		lsi->lsi_code = ksi->ksi_code;
310 		/* XXX is that right? */
311 		lsi->lsi_status = WEXITSTATUS(ksi->ksi_status);
312 		break;
313 	case LINUX_SIGIO:
314 		lsi->lsi_band = ksi->ksi_band;
315 		lsi->lsi_fd = ksi->ksi_fd;
316 		break;
317 	default:
318 		lsi->lsi_uid = ksi->ksi_uid;
319 		lsi->lsi_pid = ksi->ksi_pid;
320 		if (lsi->lsi_signo == LINUX_SIGALRM ||
321 		    lsi->lsi_signo >= LINUX_SIGRTMIN)
322 			lsi->lsi_value.sival_ptr = ksi->ksi_value.sival_ptr;
323 		break;
324 	}
325 
326 	/* Save register context. */
327 	linux_save_ucontext(l, tf, mask, sas, &frame.sf_uc);
328 	sendsig_reset(l, sig);
329 
330 	mutex_exit(&p->p_smutex);
331 	error = copyout(&frame, fp, sizeof(frame));
332 	mutex_enter(&p->p_smutex);
333 
334 	if (error != 0) {
335 		/*
336 		 * Process has trashed its stack; give it an illegal
337 		 * instruction to halt it in its tracks.
338 		 */
339 		sigexit(l, SIGILL);
340 		/* NOTREACHED */
341 	}
342 
343 	/*
344 	 * Build context to run handler in.
345 	 */
346 	tf->tf_gs = GSEL(GUDATA_SEL, SEL_UPL);
347 	tf->tf_fs = GSEL(GUDATA_SEL, SEL_UPL);
348 	tf->tf_es = GSEL(GUDATA_SEL, SEL_UPL);
349 	tf->tf_ds = GSEL(GUDATA_SEL, SEL_UPL);
350 	tf->tf_eip = ((int)p->p_sigctx.ps_sigcode) +
351 	    (linux_rt_sigcode - linux_sigcode);
352 	tf->tf_cs = GSEL(GUCODE_SEL, SEL_UPL);
353 	tf->tf_eflags &= ~(PSL_T|PSL_VM|PSL_AC);
354 	tf->tf_esp = (int)fp;
355 	tf->tf_ss = GSEL(GUDATA_SEL, SEL_UPL);
356 
357 	/* Remember that we're now on the signal stack. */
358 	if (onstack)
359 		sas->ss_flags |= SS_ONSTACK;
360 }
361 
362 static void
363 linux_old_sendsig(const ksiginfo_t *ksi, const sigset_t *mask)
364 {
365 	struct lwp *l = curlwp;
366 	struct proc *p = l->l_proc;
367 	struct trapframe *tf;
368 	struct linux_sigframe *fp, frame;
369 	int onstack, error;
370 	int sig = ksi->ksi_signo;
371 	sig_t catcher = SIGACTION(p, sig).sa_handler;
372 	struct sigaltstack *sas = &l->l_sigstk;
373 
374 	tf = l->l_md.md_regs;
375 
376 	/* Do we need to jump onto the signal stack? */
377 	onstack = (sas->ss_flags & (SS_DISABLE | SS_ONSTACK)) == 0 &&
378 	    (SIGACTION(p, sig).sa_flags & SA_ONSTACK) != 0;
379 
380 	/* Allocate space for the signal handler context. */
381 	if (onstack)
382 		fp = (struct linux_sigframe *) ((char *)sas->ss_sp +
383 		    sas->ss_size);
384 	else
385 		fp = (struct linux_sigframe *)tf->tf_esp;
386 	fp--;
387 
388 	DPRINTF(("old: onstack = %d, fp = %p sig = %d eip = 0x%x cr2 = 0x%x\n",
389 	    onstack, fp, sig, tf->tf_eip, l->l_addr->u_pcb.pcb_cr2));
390 
391 	/* Build stack frame for signal trampoline. */
392 	frame.sf_handler = catcher;
393 	frame.sf_sig = native_to_linux_signo[sig];
394 
395 	linux_save_sigcontext(l, tf, mask, &frame.sf_sc);
396 	sendsig_reset(l, sig);
397 
398 	mutex_exit(&p->p_smutex);
399 	error = copyout(&frame, fp, sizeof(frame));
400 	mutex_enter(&p->p_smutex);
401 
402 	if (error != 0) {
403 		/*
404 		 * Process has trashed its stack; give it an illegal
405 		 * instruction to halt it in its tracks.
406 		 */
407 		sigexit(l, SIGILL);
408 		/* NOTREACHED */
409 	}
410 
411 	/*
412 	 * Build context to run handler in.
413 	 */
414 	tf->tf_gs = GSEL(GUDATA_SEL, SEL_UPL);
415 	tf->tf_fs = GSEL(GUDATA_SEL, SEL_UPL);
416 	tf->tf_es = GSEL(GUDATA_SEL, SEL_UPL);
417 	tf->tf_ds = GSEL(GUDATA_SEL, SEL_UPL);
418 	tf->tf_eip = (int)p->p_sigctx.ps_sigcode;
419 	tf->tf_cs = GSEL(GUCODEBIG_SEL, SEL_UPL);
420 	tf->tf_eflags &= ~(PSL_T|PSL_VM|PSL_AC);
421 	tf->tf_esp = (int)fp;
422 	tf->tf_ss = GSEL(GUDATA_SEL, SEL_UPL);
423 
424 	/* Remember that we're now on the signal stack. */
425 	if (onstack)
426 		sas->ss_flags |= SS_ONSTACK;
427 }
428 
429 /*
430  * System call to cleanup state after a signal
431  * has been taken.  Reset signal mask and
432  * stack state from context left by sendsig (above).
433  * Return to previous pc and psl as specified by
434  * context left by sendsig. Check carefully to
435  * make sure that the user has not modified the
436  * psl to gain improper privileges or to cause
437  * a machine fault.
438  */
439 int
440 linux_sys_rt_sigreturn(struct lwp *l, const struct linux_sys_rt_sigreturn_args *uap, register_t *retval)
441 {
442 	/* {
443 		syscallarg(struct linux_ucontext *) ucp;
444 	} */
445 	struct linux_ucontext context, *ucp = SCARG(uap, ucp);
446 	int error;
447 
448 	/*
449 	 * The trampoline code hands us the context.
450 	 * It is unsafe to keep track of it ourselves, in the event that a
451 	 * program jumps out of a signal handler.
452 	 */
453 	if ((error = copyin(ucp, &context, sizeof(*ucp))) != 0)
454 		return error;
455 
456 	/* XXX XAX we can do better here by using more of the ucontext */
457 	return linux_restore_sigcontext(l, &context.uc_mcontext, retval);
458 }
459 
460 int
461 linux_sys_sigreturn(struct lwp *l, const struct linux_sys_sigreturn_args *uap, register_t *retval)
462 {
463 	/* {
464 		syscallarg(struct linux_sigcontext *) scp;
465 	} */
466 	struct linux_sigcontext context, *scp = SCARG(uap, scp);
467 	int error;
468 
469 	/*
470 	 * The trampoline code hands us the context.
471 	 * It is unsafe to keep track of it ourselves, in the event that a
472 	 * program jumps out of a signal handler.
473 	 */
474 	if ((error = copyin((void *)scp, &context, sizeof(*scp))) != 0)
475 		return error;
476 	return linux_restore_sigcontext(l, &context, retval);
477 }
478 
479 static int
480 linux_restore_sigcontext(struct lwp *l, struct linux_sigcontext *scp,
481     register_t *retval)
482 {
483 	struct proc *p = l->l_proc;
484 	struct sigaltstack *sas = &l->l_sigstk;
485 	struct trapframe *tf;
486 	sigset_t mask;
487 	ssize_t ss_gap;
488 	/* Restore register context. */
489 	tf = l->l_md.md_regs;
490 
491 	DPRINTF(("sigreturn enter esp=%x eip=%x\n", tf->tf_esp, tf->tf_eip));
492 #ifdef VM86
493 	if (scp->sc_eflags & PSL_VM) {
494 		void syscall_vm86(struct trapframe *);
495 
496 		tf->tf_vm86_gs = scp->sc_gs;
497 		tf->tf_vm86_fs = scp->sc_fs;
498 		tf->tf_vm86_es = scp->sc_es;
499 		tf->tf_vm86_ds = scp->sc_ds;
500 		set_vflags(l, scp->sc_eflags);
501 		p->p_md.md_syscall = syscall_vm86;
502 	} else
503 #endif
504 	{
505 		/*
506 		 * Check for security violations.  If we're returning to
507 		 * protected mode, the CPU will validate the segment registers
508 		 * automatically and generate a trap on violations.  We handle
509 		 * the trap, rather than doing all of the checking here.
510 		 */
511 		if (((scp->sc_eflags ^ tf->tf_eflags) & PSL_USERSTATIC) != 0 ||
512 		    !USERMODE(scp->sc_cs, scp->sc_eflags))
513 			return EINVAL;
514 
515 		tf->tf_gs = scp->sc_gs;
516 		tf->tf_fs = scp->sc_fs;
517 		tf->tf_es = scp->sc_es;
518 		tf->tf_ds = scp->sc_ds;
519 #ifdef VM86
520 		if (tf->tf_eflags & PSL_VM)
521 			(*p->p_emul->e_syscall_intern)(p);
522 #endif
523 		tf->tf_eflags = scp->sc_eflags;
524 	}
525 	tf->tf_edi = scp->sc_edi;
526 	tf->tf_esi = scp->sc_esi;
527 	tf->tf_ebp = scp->sc_ebp;
528 	tf->tf_ebx = scp->sc_ebx;
529 	tf->tf_edx = scp->sc_edx;
530 	tf->tf_ecx = scp->sc_ecx;
531 	tf->tf_eax = scp->sc_eax;
532 	tf->tf_eip = scp->sc_eip;
533 	tf->tf_cs = scp->sc_cs;
534 	tf->tf_esp = scp->sc_esp_at_signal;
535 	tf->tf_ss = scp->sc_ss;
536 
537 	/* Restore signal stack. */
538 	/*
539 	 * Linux really does it this way; it doesn't have space in sigframe
540 	 * to save the onstack flag.
541 	 */
542 	mutex_enter(&p->p_smutex);
543 	ss_gap = (ssize_t)((char *)scp->sc_esp_at_signal - (char *)sas->ss_sp);
544 	if (ss_gap >= 0 && ss_gap < sas->ss_size)
545 		sas->ss_flags |= SS_ONSTACK;
546 	else
547 		sas->ss_flags &= ~SS_ONSTACK;
548 
549 	/* Restore signal mask. */
550 	linux_old_to_native_sigset(&mask, &scp->sc_mask);
551 	(void) sigprocmask1(l, SIG_SETMASK, &mask, 0);
552 	mutex_exit(&p->p_smutex);
553 
554 	DPRINTF(("sigreturn exit esp=%x eip=%x\n", tf->tf_esp, tf->tf_eip));
555 	return EJUSTRETURN;
556 }
557 
558 #ifdef USER_LDT
559 
560 static int
561 linux_read_ldt(struct lwp *l, const struct linux_sys_modify_ldt_args *uap,
562     register_t *retval)
563 {
564 	struct x86_get_ldt_args gl;
565 	int error;
566 	int num_ldt;
567 	union descriptor *ldt_buf;
568 
569 	/*
570 	 * I've checked the linux code - this function is asymetric with
571 	 * linux_write_ldt, and returns raw ldt entries.
572 	 * NB, the code I saw zerod the spare parts of the user buffer.
573 	 */
574 
575 	DPRINTF(("linux_read_ldt!"));
576 
577 	num_ldt = x86_get_ldt_len(l);
578 	if (num_ldt <= 0)
579 		return EINVAL;
580 
581 	gl.start = 0;
582 	gl.desc = NULL;
583 	gl.num = SCARG(uap, bytecount) / sizeof(union descriptor);
584 
585 	if (gl.num > num_ldt)
586 		gl.num = num_ldt;
587 
588 	ldt_buf = malloc(gl.num * sizeof *ldt, M_TEMP, M_WAITOK);
589 
590 	error = x86_get_ldt1(l, &gl, ldt_buf);
591 	/* NB gl.num might have changed */
592 	if (error == 0) {
593 		*retval = gl.num * sizeof *ldt;
594 		error = copyout(ldt_buf, SCARG(uap, ptr),
595 		    gl.num * sizeof *ldt_buf);
596 	}
597 	free(ldt, M_TEMP);
598 
599 	return error;
600 }
601 
602 struct linux_ldt_info {
603 	u_int entry_number;
604 	u_long base_addr;
605 	u_int limit;
606 	u_int seg_32bit:1;
607 	u_int contents:2;
608 	u_int read_exec_only:1;
609 	u_int limit_in_pages:1;
610 	u_int seg_not_present:1;
611 	u_int useable:1;
612 };
613 
614 static int
615 linux_write_ldt(struct lwp *l, const struct linux_sys_modify_ldt_args *uap,
616     int oldmode)
617 {
618 	struct linux_ldt_info ldt_info;
619 	union descriptor d;
620 	struct x86_set_ldt_args sl;
621 	int error;
622 
623 	DPRINTF(("linux_write_ldt %d\n", oldmode));
624 	if (SCARG(uap, bytecount) != sizeof(ldt_info))
625 		return (EINVAL);
626 	if ((error = copyin(SCARG(uap, ptr), &ldt_info, sizeof(ldt_info))) != 0)
627 		return error;
628 	if (ldt_info.entry_number >= 8192)
629 		return (EINVAL);
630 	if (ldt_info.contents == 3) {
631 		if (oldmode)
632 			return (EINVAL);
633 		if (ldt_info.seg_not_present)
634 			return (EINVAL);
635 	}
636 
637 	if (ldt_info.base_addr == 0 && ldt_info.limit == 0 &&
638 	    (oldmode || (ldt_info.contents == 0 &&
639 	    ldt_info.read_exec_only == 1 && ldt_info.seg_32bit == 0 &&
640 	    ldt_info.limit_in_pages == 0 && ldt_info.seg_not_present == 1 &&
641 	    ldt_info.useable == 0))) {
642 		/* this means you should zero the ldt */
643 		(void)memset(&d, 0, sizeof(d));
644 	} else {
645 		d.sd.sd_lobase = ldt_info.base_addr & 0xffffff;
646 		d.sd.sd_hibase = (ldt_info.base_addr >> 24) & 0xff;
647 		d.sd.sd_lolimit = ldt_info.limit & 0xffff;
648 		d.sd.sd_hilimit = (ldt_info.limit >> 16) & 0xf;
649 		d.sd.sd_type = 16 | (ldt_info.contents << 2) |
650 		    (!ldt_info.read_exec_only << 1);
651 		d.sd.sd_dpl = SEL_UPL;
652 		d.sd.sd_p = !ldt_info.seg_not_present;
653 		d.sd.sd_def32 = ldt_info.seg_32bit;
654 		d.sd.sd_gran = ldt_info.limit_in_pages;
655 		if (!oldmode)
656 			d.sd.sd_xx = ldt_info.useable;
657 		else
658 			d.sd.sd_xx = 0;
659 	}
660 	sl.start = ldt_info.entry_number;
661 	sl.desc = NULL;;
662 	sl.num = 1;
663 
664 	DPRINTF(("linux_write_ldt: idx=%d, base=0x%lx, limit=0x%x\n",
665 	    ldt_info.entry_number, ldt_info.base_addr, ldt_info.limit));
666 
667 	return x86_set_ldt1(l, &sl, &d);
668 }
669 
670 #endif /* USER_LDT */
671 
672 int
673 linux_sys_modify_ldt(struct lwp *l, const struct linux_sys_modify_ldt_args *uap, register_t *retval)
674 {
675 	/* {
676 		syscallarg(int) func;
677 		syscallarg(void *) ptr;
678 		syscallarg(size_t) bytecount;
679 	} */
680 
681 	switch (SCARG(uap, func)) {
682 #ifdef USER_LDT
683 	case 0:
684 		return linux_read_ldt(l, (const void *)uap, retval);
685 	case 1:
686 		return linux_write_ldt(l, (const void *)uap, 1);
687 	case 2:
688 #ifdef notyet
689 		return (linux_read_default_ldt(l, (const void *)uap, retval);
690 #else
691 		return (ENOSYS);
692 #endif
693 	case 0x11:
694 		return linux_write_ldt(l, (const void *)uap, 0);
695 #endif /* USER_LDT */
696 
697 	default:
698 		return (ENOSYS);
699 	}
700 }
701 
702 /*
703  * XXX Pathetic hack to make svgalib work. This will fake the major
704  * device number of an opened VT so that svgalib likes it. grmbl.
705  * Should probably do it 'wrong the right way' and use a mapping
706  * array for all major device numbers, and map linux_mknod too.
707  */
708 dev_t
709 linux_fakedev(dev_t dev, int raw)
710 {
711 	extern const struct cdevsw ptc_cdevsw, pts_cdevsw;
712 	const struct cdevsw *cd = cdevsw_lookup(dev);
713 
714 	if (raw) {
715 #if (NWSDISPLAY > 0)
716 		extern const struct cdevsw wsdisplay_cdevsw;
717 		if (cd == &wsdisplay_cdevsw)
718 			return makedev(LINUX_CONS_MAJOR, (minor(dev) + 1));
719 #endif
720 	}
721 
722 	if (cd == &ptc_cdevsw)
723 		return makedev(LINUX_PTC_MAJOR, minor(dev));
724 	if (cd == &pts_cdevsw)
725 		return makedev(LINUX_PTS_MAJOR, minor(dev));
726 
727 	return dev;
728 }
729 
730 #if (NWSDISPLAY > 0)
731 /*
732  * That's not complete, but enough to get an X server running.
733  */
734 #define NR_KEYS 128
735 static const u_short plain_map[NR_KEYS] = {
736 	0x0200,	0x001b,	0x0031,	0x0032,	0x0033,	0x0034,	0x0035,	0x0036,
737 	0x0037,	0x0038,	0x0039,	0x0030,	0x002d,	0x003d,	0x007f,	0x0009,
738 	0x0b71,	0x0b77,	0x0b65,	0x0b72,	0x0b74,	0x0b79,	0x0b75,	0x0b69,
739 	0x0b6f,	0x0b70,	0x005b,	0x005d,	0x0201,	0x0702,	0x0b61,	0x0b73,
740 	0x0b64,	0x0b66,	0x0b67,	0x0b68,	0x0b6a,	0x0b6b,	0x0b6c,	0x003b,
741 	0x0027,	0x0060,	0x0700,	0x005c,	0x0b7a,	0x0b78,	0x0b63,	0x0b76,
742 	0x0b62,	0x0b6e,	0x0b6d,	0x002c,	0x002e,	0x002f,	0x0700,	0x030c,
743 	0x0703,	0x0020,	0x0207,	0x0100,	0x0101,	0x0102,	0x0103,	0x0104,
744 	0x0105,	0x0106,	0x0107,	0x0108,	0x0109,	0x0208,	0x0209,	0x0307,
745 	0x0308,	0x0309,	0x030b,	0x0304,	0x0305,	0x0306,	0x030a,	0x0301,
746 	0x0302,	0x0303,	0x0300,	0x0310,	0x0206,	0x0200,	0x003c,	0x010a,
747 	0x010b,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
748 	0x030e,	0x0702,	0x030d,	0x001c,	0x0701,	0x0205,	0x0114,	0x0603,
749 	0x0118,	0x0601,	0x0602,	0x0117,	0x0600,	0x0119,	0x0115,	0x0116,
750 	0x011a,	0x010c,	0x010d,	0x011b,	0x011c,	0x0110,	0x0311,	0x011d,
751 	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
752 }, shift_map[NR_KEYS] = {
753 	0x0200,	0x001b,	0x0021,	0x0040,	0x0023,	0x0024,	0x0025,	0x005e,
754 	0x0026,	0x002a,	0x0028,	0x0029,	0x005f,	0x002b,	0x007f,	0x0009,
755 	0x0b51,	0x0b57,	0x0b45,	0x0b52,	0x0b54,	0x0b59,	0x0b55,	0x0b49,
756 	0x0b4f,	0x0b50,	0x007b,	0x007d,	0x0201,	0x0702,	0x0b41,	0x0b53,
757 	0x0b44,	0x0b46,	0x0b47,	0x0b48,	0x0b4a,	0x0b4b,	0x0b4c,	0x003a,
758 	0x0022,	0x007e,	0x0700,	0x007c,	0x0b5a,	0x0b58,	0x0b43,	0x0b56,
759 	0x0b42,	0x0b4e,	0x0b4d,	0x003c,	0x003e,	0x003f,	0x0700,	0x030c,
760 	0x0703,	0x0020,	0x0207,	0x010a,	0x010b,	0x010c,	0x010d,	0x010e,
761 	0x010f,	0x0110,	0x0111,	0x0112,	0x0113,	0x0213,	0x0203,	0x0307,
762 	0x0308,	0x0309,	0x030b,	0x0304,	0x0305,	0x0306,	0x030a,	0x0301,
763 	0x0302,	0x0303,	0x0300,	0x0310,	0x0206,	0x0200,	0x003e,	0x010a,
764 	0x010b,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
765 	0x030e,	0x0702,	0x030d,	0x0200,	0x0701,	0x0205,	0x0114,	0x0603,
766 	0x020b,	0x0601,	0x0602,	0x0117,	0x0600,	0x020a,	0x0115,	0x0116,
767 	0x011a,	0x010c,	0x010d,	0x011b,	0x011c,	0x0110,	0x0311,	0x011d,
768 	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
769 }, altgr_map[NR_KEYS] = {
770 	0x0200,	0x0200,	0x0200,	0x0040,	0x0200,	0x0024,	0x0200,	0x0200,
771 	0x007b,	0x005b,	0x005d,	0x007d,	0x005c,	0x0200,	0x0200,	0x0200,
772 	0x0b71,	0x0b77,	0x0918,	0x0b72,	0x0b74,	0x0b79,	0x0b75,	0x0b69,
773 	0x0b6f,	0x0b70,	0x0200,	0x007e,	0x0201,	0x0702,	0x0914,	0x0b73,
774 	0x0917,	0x0919,	0x0b67,	0x0b68,	0x0b6a,	0x0b6b,	0x0b6c,	0x0200,
775 	0x0200,	0x0200,	0x0700,	0x0200,	0x0b7a,	0x0b78,	0x0916,	0x0b76,
776 	0x0915,	0x0b6e,	0x0b6d,	0x0200,	0x0200,	0x0200,	0x0700,	0x030c,
777 	0x0703,	0x0200,	0x0207,	0x050c,	0x050d,	0x050e,	0x050f,	0x0510,
778 	0x0511,	0x0512,	0x0513,	0x0514,	0x0515,	0x0208,	0x0202,	0x0911,
779 	0x0912,	0x0913,	0x030b,	0x090e,	0x090f,	0x0910,	0x030a,	0x090b,
780 	0x090c,	0x090d,	0x090a,	0x0310,	0x0206,	0x0200,	0x007c,	0x0516,
781 	0x0517,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
782 	0x030e,	0x0702,	0x030d,	0x0200,	0x0701,	0x0205,	0x0114,	0x0603,
783 	0x0118,	0x0601,	0x0602,	0x0117,	0x0600,	0x0119,	0x0115,	0x0116,
784 	0x011a,	0x010c,	0x010d,	0x011b,	0x011c,	0x0110,	0x0311,	0x011d,
785 	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
786 }, ctrl_map[NR_KEYS] = {
787 	0x0200,	0x0200,	0x0200,	0x0000,	0x001b,	0x001c,	0x001d,	0x001e,
788 	0x001f,	0x007f,	0x0200,	0x0200,	0x001f,	0x0200,	0x0008,	0x0200,
789 	0x0011,	0x0017,	0x0005,	0x0012,	0x0014,	0x0019,	0x0015,	0x0009,
790 	0x000f,	0x0010,	0x001b,	0x001d,	0x0201,	0x0702,	0x0001,	0x0013,
791 	0x0004,	0x0006,	0x0007,	0x0008,	0x000a,	0x000b,	0x000c,	0x0200,
792 	0x0007,	0x0000,	0x0700,	0x001c,	0x001a,	0x0018,	0x0003,	0x0016,
793 	0x0002,	0x000e,	0x000d,	0x0200,	0x020e,	0x007f,	0x0700,	0x030c,
794 	0x0703,	0x0000,	0x0207,	0x0100,	0x0101,	0x0102,	0x0103,	0x0104,
795 	0x0105,	0x0106,	0x0107,	0x0108,	0x0109,	0x0208,	0x0204,	0x0307,
796 	0x0308,	0x0309,	0x030b,	0x0304,	0x0305,	0x0306,	0x030a,	0x0301,
797 	0x0302,	0x0303,	0x0300,	0x0310,	0x0206,	0x0200,	0x0200,	0x010a,
798 	0x010b,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
799 	0x030e,	0x0702,	0x030d,	0x001c,	0x0701,	0x0205,	0x0114,	0x0603,
800 	0x0118,	0x0601,	0x0602,	0x0117,	0x0600,	0x0119,	0x0115,	0x0116,
801 	0x011a,	0x010c,	0x010d,	0x011b,	0x011c,	0x0110,	0x0311,	0x011d,
802 	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
803 };
804 
805 const u_short * const linux_keytabs[] = {
806 	plain_map, shift_map, altgr_map, altgr_map, ctrl_map
807 };
808 #endif
809 
810 static struct biosdisk_info *
811 fd2biosinfo(struct proc *p, struct file *fp)
812 {
813 	struct vnode *vp;
814 	const char *blkname;
815 	char diskname[16];
816 	int i;
817 	struct nativedisk_info *nip;
818 	struct disklist *dl = x86_alldisks;
819 
820 	if (fp->f_type != DTYPE_VNODE)
821 		return NULL;
822 	vp = (struct vnode *)fp->f_data;
823 
824 	if (vp->v_type != VBLK)
825 		return NULL;
826 
827 	blkname = devsw_blk2name(major(vp->v_rdev));
828 	snprintf(diskname, sizeof diskname, "%s%u", blkname,
829 	    DISKUNIT(vp->v_rdev));
830 
831 	for (i = 0; i < dl->dl_nnativedisks; i++) {
832 		nip = &dl->dl_nativedisks[i];
833 		if (strcmp(diskname, nip->ni_devname))
834 			continue;
835 		if (nip->ni_nmatches != 0)
836 			return &dl->dl_biosdisks[nip->ni_biosmatches[0]];
837 	}
838 
839 	return NULL;
840 }
841 
842 
843 /*
844  * We come here in a last attempt to satisfy a Linux ioctl() call
845  */
846 int
847 linux_machdepioctl(struct lwp *l, const struct linux_sys_ioctl_args *uap, register_t *retval)
848 {
849 	/* {
850 		syscallarg(int) fd;
851 		syscallarg(u_long) com;
852 		syscallarg(void *) data;
853 	} */
854 	struct sys_ioctl_args bia;
855 	u_long com;
856 	int error, error1;
857 #if (NWSDISPLAY > 0)
858 	struct vt_mode lvt;
859 	struct kbentry kbe;
860 #endif
861 	struct linux_hd_geometry hdg;
862 	struct linux_hd_big_geometry hdg_big;
863 	struct biosdisk_info *bip;
864 	struct filedesc *fdp;
865 	struct file *fp;
866 	int fd;
867 	struct disklabel label, *labp;
868 	struct partinfo partp;
869 	int (*ioctlf)(struct file *, u_long, void *, struct lwp *);
870 	u_long start, biostotal, realtotal;
871 	u_char heads, sectors;
872 	u_int cylinders;
873 	struct ioctl_pt pt;
874 	struct proc *p = l->l_proc;
875 
876 	fd = SCARG(uap, fd);
877 	SCARG(&bia, fd) = fd;
878 	SCARG(&bia, data) = SCARG(uap, data);
879 	com = SCARG(uap, com);
880 
881 	fdp = p->p_fd;
882 
883 	if ((fp = fd_getfile(fdp, fd)) == NULL)
884 		return (EBADF);
885 
886 	FILE_USE(fp);
887 
888 	switch (com) {
889 #if (NWSDISPLAY > 0)
890 	case LINUX_KDGKBMODE:
891 		com = KDGKBMODE;
892 		break;
893 	case LINUX_KDSKBMODE:
894 		com = KDSKBMODE;
895 		if ((unsigned)SCARG(uap, data) == LINUX_K_MEDIUMRAW)
896 			SCARG(&bia, data) = (void *)K_RAW;
897 		break;
898 	case LINUX_KIOCSOUND:
899 		SCARG(&bia, data) =
900 		    (void *)(((unsigned long)SCARG(&bia, data)) & 0xffff);
901 		/* fall through */
902 	case LINUX_KDMKTONE:
903 		com = KDMKTONE;
904 		break;
905 	case LINUX_KDSETMODE:
906 		com = KDSETMODE;
907 		break;
908 	case LINUX_KDGETMODE:
909 		/* KD_* values are equal to the wscons numbers */
910 		com = WSDISPLAYIO_GMODE;
911 		break;
912 	case LINUX_KDENABIO:
913 		com = KDENABIO;
914 		break;
915 	case LINUX_KDDISABIO:
916 		com = KDDISABIO;
917 		break;
918 	case LINUX_KDGETLED:
919 		com = KDGETLED;
920 		break;
921 	case LINUX_KDSETLED:
922 		com = KDSETLED;
923 		break;
924 	case LINUX_VT_OPENQRY:
925 		com = VT_OPENQRY;
926 		break;
927 	case LINUX_VT_GETMODE:
928 		error = fp->f_ops->fo_ioctl(fp, VT_GETMODE, &lvt, l);
929 		if (error != 0)
930 			goto out;
931 		lvt.relsig = native_to_linux_signo[lvt.relsig];
932 		lvt.acqsig = native_to_linux_signo[lvt.acqsig];
933 		lvt.frsig = native_to_linux_signo[lvt.frsig];
934 		error = copyout(&lvt, SCARG(uap, data), sizeof (lvt));
935 		goto out;
936 	case LINUX_VT_SETMODE:
937 		error = copyin(SCARG(uap, data), &lvt, sizeof (lvt));
938 		if (error != 0)
939 			goto out;
940 		lvt.relsig = linux_to_native_signo[lvt.relsig];
941 		lvt.acqsig = linux_to_native_signo[lvt.acqsig];
942 		lvt.frsig = linux_to_native_signo[lvt.frsig];
943 		error = fp->f_ops->fo_ioctl(fp, VT_SETMODE, &lvt, l);
944 		goto out;
945 	case LINUX_VT_DISALLOCATE:
946 		/* XXX should use WSDISPLAYIO_DELSCREEN */
947 		error = 0;
948 		goto out;
949 	case LINUX_VT_RELDISP:
950 		com = VT_RELDISP;
951 		break;
952 	case LINUX_VT_ACTIVATE:
953 		com = VT_ACTIVATE;
954 		break;
955 	case LINUX_VT_WAITACTIVE:
956 		com = VT_WAITACTIVE;
957 		break;
958 	case LINUX_VT_GETSTATE:
959 		com = VT_GETSTATE;
960 		break;
961 	case LINUX_KDGKBTYPE:
962 	    {
963 		static const u_int8_t kb101 = KB_101;
964 
965 		/* This is what Linux does. */
966 		error = copyout(&kb101, SCARG(uap, data), 1);
967 		goto out;
968 	    }
969 	case LINUX_KDGKBENT:
970 		/*
971 		 * The Linux KDGKBENT ioctl is different from the
972 		 * SYSV original. So we handle it in machdep code.
973 		 * XXX We should use keyboard mapping information
974 		 * from wsdisplay, but this would be expensive.
975 		 */
976 		if ((error = copyin(SCARG(uap, data), &kbe,
977 				    sizeof(struct kbentry))))
978 			goto out;
979 		if (kbe.kb_table >= sizeof(linux_keytabs) / sizeof(u_short *)
980 		    || kbe.kb_index >= NR_KEYS) {
981 			error = EINVAL;
982 			goto out;
983 		}
984 		kbe.kb_value = linux_keytabs[kbe.kb_table][kbe.kb_index];
985 		error = copyout(&kbe, SCARG(uap, data),
986 				sizeof(struct kbentry));
987 		goto out;
988 #endif
989 	case LINUX_HDIO_GETGEO:
990 	case LINUX_HDIO_GETGEO_BIG:
991 		/*
992 		 * Try to mimic Linux behaviour: return the BIOS geometry
993 		 * if possible (extending its # of cylinders if it's beyond
994 		 * the 1023 limit), fall back to the MI geometry (i.e.
995 		 * the real geometry) if not found, by returning an
996 		 * error. See common/linux_hdio.c
997 		 */
998 		bip = fd2biosinfo(p, fp);
999 		ioctlf = fp->f_ops->fo_ioctl;
1000 		error = ioctlf(fp, DIOCGDEFLABEL, (void *)&label, l);
1001 		error1 = ioctlf(fp, DIOCGPART, (void *)&partp, l);
1002 		if (error != 0 && error1 != 0) {
1003 			error = error1;
1004 			goto out;
1005 		}
1006 		labp = error != 0 ? &label : partp.disklab;
1007 		start = error1 != 0 ? partp.part->p_offset : 0;
1008 		if (bip != NULL && bip->bi_head != 0 && bip->bi_sec != 0
1009 		    && bip->bi_cyl != 0) {
1010 			heads = bip->bi_head;
1011 			sectors = bip->bi_sec;
1012 			cylinders = bip->bi_cyl;
1013 			biostotal = heads * sectors * cylinders;
1014 			realtotal = labp->d_ntracks * labp->d_nsectors *
1015 			    labp->d_ncylinders;
1016 			if (realtotal > biostotal)
1017 				cylinders = realtotal / (heads * sectors);
1018 		} else {
1019 			heads = labp->d_ntracks;
1020 			cylinders = labp->d_ncylinders;
1021 			sectors = labp->d_nsectors;
1022 		}
1023 		if (com == LINUX_HDIO_GETGEO) {
1024 			hdg.start = start;
1025 			hdg.heads = heads;
1026 			hdg.cylinders = cylinders;
1027 			hdg.sectors = sectors;
1028 			error = copyout(&hdg, SCARG(uap, data), sizeof hdg);
1029 			goto out;
1030 		} else {
1031 			hdg_big.start = start;
1032 			hdg_big.heads = heads;
1033 			hdg_big.cylinders = cylinders;
1034 			hdg_big.sectors = sectors;
1035 			error = copyout(&hdg_big, SCARG(uap, data),
1036 			    sizeof hdg_big);
1037 			goto out;
1038 		}
1039 
1040 	default:
1041 		/*
1042 		 * Unknown to us. If it's on a device, just pass it through
1043 		 * using PTIOCLINUX, the device itself might be able to
1044 		 * make some sense of it.
1045 		 * XXX hack: if the function returns EJUSTRETURN,
1046 		 * it has stuffed a sysctl return value in pt.data.
1047 		 */
1048 		ioctlf = fp->f_ops->fo_ioctl;
1049 		pt.com = SCARG(uap, com);
1050 		pt.data = SCARG(uap, data);
1051 		error = ioctlf(fp, PTIOCLINUX, (void *)&pt, l);
1052 		if (error == EJUSTRETURN) {
1053 			retval[0] = (register_t)pt.data;
1054 			error = 0;
1055 		}
1056 
1057 		if (error == ENOTTY) {
1058 			DPRINTF(("linux_machdepioctl: invalid ioctl %08lx\n",
1059 			    com));
1060 		}
1061 		goto out;
1062 	}
1063 	SCARG(&bia, com) = com;
1064 	/* XXX NJWLWP */
1065 	error = sys_ioctl(curlwp, &bia, retval);
1066 out:
1067 	FILE_UNUSE(fp ,l);
1068 	return error;
1069 }
1070 
1071 /*
1072  * Set I/O permissions for a process. Just set the maximum level
1073  * right away (ignoring the argument), otherwise we would have
1074  * to rely on I/O permission maps, which are not implemented.
1075  */
1076 int
1077 linux_sys_iopl(struct lwp *l, const struct linux_sys_iopl_args *uap, register_t *retval)
1078 {
1079 	/* {
1080 		syscallarg(int) level;
1081 	} */
1082 	struct trapframe *fp = l->l_md.md_regs;
1083 
1084 	if (kauth_authorize_machdep(l->l_cred, KAUTH_MACHDEP_IOPL,
1085 	    NULL, NULL, NULL, NULL) != 0)
1086 		return EPERM;
1087 	fp->tf_eflags |= PSL_IOPL;
1088 	*retval = 0;
1089 	return 0;
1090 }
1091 
1092 /*
1093  * See above. If a root process tries to set access to an I/O port,
1094  * just let it have the whole range.
1095  */
1096 int
1097 linux_sys_ioperm(struct lwp *l, const struct linux_sys_ioperm_args *uap, register_t *retval)
1098 {
1099 	/* {
1100 		syscallarg(unsigned int) lo;
1101 		syscallarg(unsigned int) hi;
1102 		syscallarg(int) val;
1103 	} */
1104 	struct trapframe *fp = l->l_md.md_regs;
1105 
1106 	if (kauth_authorize_machdep(l->l_cred, SCARG(uap, val) ?
1107 	    KAUTH_MACHDEP_IOPERM_SET : KAUTH_MACHDEP_IOPERM_GET, NULL, NULL,
1108 	    NULL, NULL) != 0)
1109 		return EPERM;
1110 	if (SCARG(uap, val))
1111 		fp->tf_eflags |= PSL_IOPL;
1112 	*retval = 0;
1113 	return 0;
1114 }
1115 
1116 int
1117 linux_usertrap(struct lwp *l, vaddr_t trapaddr,
1118     void *arg)
1119 {
1120 	return 0;
1121 }
1122 
1123 const char *
1124 linux_get_uname_arch(void)
1125 {
1126 	static char uname_arch[5] = "i386";
1127 
1128 	if (uname_arch[1] == '3')
1129 		uname_arch[1] += cpu_class;
1130 	return uname_arch;
1131 }
1132 
1133 #ifdef LINUX_NPTL
1134 void *
1135 linux_get_newtls(struct lwp *l)
1136 {
1137 	struct trapframe *tf = l->l_md.md_regs;
1138 
1139 	/* XXX: Implement me */
1140 	return NULL;
1141 }
1142 
1143 int
1144 linux_set_newtls(struct lwp *l, void *tls)
1145 {
1146 	/* XXX: Implement me */
1147 	return 0;
1148 }
1149 #endif
1150