xref: /netbsd-src/sys/compat/linux/arch/i386/linux_machdep.c (revision 8b0f9554ff8762542c4defc4f70e1eb76fb508fa)
1 /*	$NetBSD: linux_machdep.c,v 1.132 2007/12/08 18:36:05 dsl Exp $	*/
2 
3 /*-
4  * Copyright (c) 1995, 2000 The NetBSD Foundation, Inc.
5  * All rights reserved.
6  *
7  * This code is derived from software contributed to The NetBSD Foundation
8  * by Frank van der Linden.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  * 3. All advertising materials mentioning features or use of this software
19  *    must display the following acknowledgement:
20  *	This product includes software developed by the NetBSD
21  *	Foundation, Inc. and its contributors.
22  * 4. Neither the name of The NetBSD Foundation nor the names of its
23  *    contributors may be used to endorse or promote products derived
24  *    from this software without specific prior written permission.
25  *
26  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
27  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
28  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
29  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
30  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36  * POSSIBILITY OF SUCH DAMAGE.
37  */
38 
39 #include <sys/cdefs.h>
40 __KERNEL_RCSID(0, "$NetBSD: linux_machdep.c,v 1.132 2007/12/08 18:36:05 dsl Exp $");
41 
42 #if defined(_KERNEL_OPT)
43 #include "opt_vm86.h"
44 #include "opt_user_ldt.h"
45 #endif
46 
47 #include <sys/param.h>
48 #include <sys/systm.h>
49 #include <sys/signalvar.h>
50 #include <sys/kernel.h>
51 #include <sys/proc.h>
52 #include <sys/user.h>
53 #include <sys/buf.h>
54 #include <sys/reboot.h>
55 #include <sys/conf.h>
56 #include <sys/exec.h>
57 #include <sys/file.h>
58 #include <sys/callout.h>
59 #include <sys/malloc.h>
60 #include <sys/mbuf.h>
61 #include <sys/msgbuf.h>
62 #include <sys/mount.h>
63 #include <sys/vnode.h>
64 #include <sys/device.h>
65 #include <sys/syscallargs.h>
66 #include <sys/filedesc.h>
67 #include <sys/exec_elf.h>
68 #include <sys/disklabel.h>
69 #include <sys/ioctl.h>
70 #include <sys/wait.h>
71 #include <sys/kauth.h>
72 
73 #include <miscfs/specfs/specdev.h>
74 
75 #include <compat/linux/common/linux_types.h>
76 #include <compat/linux/common/linux_signal.h>
77 #include <compat/linux/common/linux_util.h>
78 #include <compat/linux/common/linux_ioctl.h>
79 #include <compat/linux/common/linux_hdio.h>
80 #include <compat/linux/common/linux_exec.h>
81 #include <compat/linux/common/linux_machdep.h>
82 #include <compat/linux/common/linux_errno.h>
83 
84 #include <compat/linux/linux_syscallargs.h>
85 
86 #include <sys/cpu.h>
87 #include <machine/cpufunc.h>
88 #include <machine/psl.h>
89 #include <machine/reg.h>
90 #include <machine/segments.h>
91 #include <machine/specialreg.h>
92 #include <machine/sysarch.h>
93 #include <machine/vm86.h>
94 #include <machine/vmparam.h>
95 
96 /*
97  * To see whether wscons is configured (for virtual console ioctl calls).
98  */
99 #if defined(_KERNEL_OPT)
100 #include "wsdisplay.h"
101 #endif
102 #if (NWSDISPLAY > 0)
103 #include <dev/wscons/wsconsio.h>
104 #include <dev/wscons/wsdisplay_usl_io.h>
105 #if defined(_KERNEL_OPT)
106 #include "opt_xserver.h"
107 #endif
108 #endif
109 
110 #ifdef DEBUG_LINUX
111 #define DPRINTF(a) uprintf a
112 #else
113 #define DPRINTF(a)
114 #endif
115 
116 static struct biosdisk_info *fd2biosinfo(struct proc *, struct file *);
117 extern struct disklist *x86_alldisks;
118 static void linux_save_ucontext(struct lwp *, struct trapframe *,
119     const sigset_t *, struct sigaltstack *, struct linux_ucontext *);
120 static void linux_save_sigcontext(struct lwp *, struct trapframe *,
121     const sigset_t *, struct linux_sigcontext *);
122 static int linux_restore_sigcontext(struct lwp *,
123     struct linux_sigcontext *, register_t *);
124 static void linux_rt_sendsig(const ksiginfo_t *, const sigset_t *);
125 static void linux_old_sendsig(const ksiginfo_t *, const sigset_t *);
126 
127 extern char linux_sigcode[], linux_rt_sigcode[];
128 /*
129  * Deal with some i386-specific things in the Linux emulation code.
130  */
131 
132 void
133 linux_setregs(struct lwp *l, struct exec_package *epp, u_long stack)
134 {
135 	struct pcb *pcb = &l->l_addr->u_pcb;
136 	struct trapframe *tf;
137 
138 #if NNPX > 0
139 	/* If we were using the FPU, forget about it. */
140 	if (npxproc == l)
141 		npxdrop();
142 #endif
143 
144 #ifdef USER_LDT
145 	pmap_ldt_cleanup(l);
146 #endif
147 
148 	l->l_md.md_flags &= ~MDL_USEDFPU;
149 
150 	if (i386_use_fxsave) {
151 		pcb->pcb_savefpu.sv_xmm.sv_env.en_cw = __Linux_NPXCW__;
152 		pcb->pcb_savefpu.sv_xmm.sv_env.en_mxcsr = __INITIAL_MXCSR__;
153 	} else
154 		pcb->pcb_savefpu.sv_87.sv_env.en_cw = __Linux_NPXCW__;
155 
156 	tf = l->l_md.md_regs;
157 	tf->tf_gs = GSEL(GUDATA_SEL, SEL_UPL);
158 	tf->tf_fs = GSEL(GUDATA_SEL, SEL_UPL);
159 	tf->tf_es = GSEL(GUDATA_SEL, SEL_UPL);
160 	tf->tf_ds = GSEL(GUDATA_SEL, SEL_UPL);
161 	tf->tf_edi = 0;
162 	tf->tf_esi = 0;
163 	tf->tf_ebp = 0;
164 	tf->tf_ebx = (int)l->l_proc->p_psstr;
165 	tf->tf_edx = 0;
166 	tf->tf_ecx = 0;
167 	tf->tf_eax = 0;
168 	tf->tf_eip = epp->ep_entry;
169 	tf->tf_cs = GSEL(GUCODEBIG_SEL, SEL_UPL);
170 	tf->tf_eflags = PSL_USERSET;
171 	tf->tf_esp = stack;
172 	tf->tf_ss = GSEL(GUDATA_SEL, SEL_UPL);
173 }
174 
175 /*
176  * Send an interrupt to process.
177  *
178  * Stack is set up to allow sigcode stored
179  * in u. to call routine, followed by kcall
180  * to sigreturn routine below.  After sigreturn
181  * resets the signal mask, the stack, and the
182  * frame pointer, it returns to the user
183  * specified pc, psl.
184  */
185 
186 void
187 linux_sendsig(const ksiginfo_t *ksi, const sigset_t *mask)
188 {
189 	if (SIGACTION(curproc, ksi->ksi_signo).sa_flags & SA_SIGINFO)
190 		linux_rt_sendsig(ksi, mask);
191 	else
192 		linux_old_sendsig(ksi, mask);
193 }
194 
195 
196 static void
197 linux_save_ucontext(struct lwp *l, struct trapframe *tf, const sigset_t *mask, struct sigaltstack *sas, struct linux_ucontext *uc)
198 {
199 	uc->uc_flags = 0;
200 	uc->uc_link = NULL;
201 	native_to_linux_sigaltstack(&uc->uc_stack, sas);
202 	linux_save_sigcontext(l, tf, mask, &uc->uc_mcontext);
203 	native_to_linux_sigset(&uc->uc_sigmask, mask);
204 	(void)memset(&uc->uc_fpregs_mem, 0, sizeof(uc->uc_fpregs_mem));
205 }
206 
207 static void
208 linux_save_sigcontext(struct lwp *l, struct trapframe *tf, const sigset_t *mask, struct linux_sigcontext *sc)
209 {
210 	/* Save register context. */
211 #ifdef VM86
212 	if (tf->tf_eflags & PSL_VM) {
213 		sc->sc_gs = tf->tf_vm86_gs;
214 		sc->sc_fs = tf->tf_vm86_fs;
215 		sc->sc_es = tf->tf_vm86_es;
216 		sc->sc_ds = tf->tf_vm86_ds;
217 		sc->sc_eflags = get_vflags(l);
218 	} else
219 #endif
220 	{
221 		sc->sc_gs = tf->tf_gs;
222 		sc->sc_fs = tf->tf_fs;
223 		sc->sc_es = tf->tf_es;
224 		sc->sc_ds = tf->tf_ds;
225 		sc->sc_eflags = tf->tf_eflags;
226 	}
227 	sc->sc_edi = tf->tf_edi;
228 	sc->sc_esi = tf->tf_esi;
229 	sc->sc_esp = tf->tf_esp;
230 	sc->sc_ebp = tf->tf_ebp;
231 	sc->sc_ebx = tf->tf_ebx;
232 	sc->sc_edx = tf->tf_edx;
233 	sc->sc_ecx = tf->tf_ecx;
234 	sc->sc_eax = tf->tf_eax;
235 	sc->sc_eip = tf->tf_eip;
236 	sc->sc_cs = tf->tf_cs;
237 	sc->sc_esp_at_signal = tf->tf_esp;
238 	sc->sc_ss = tf->tf_ss;
239 	sc->sc_err = tf->tf_err;
240 	sc->sc_trapno = tf->tf_trapno;
241 	sc->sc_cr2 = l->l_addr->u_pcb.pcb_cr2;
242 	sc->sc_387 = NULL;
243 
244 	/* Save signal stack. */
245 	/* Linux doesn't save the onstack flag in sigframe */
246 
247 	/* Save signal mask. */
248 	native_to_linux_old_sigset(&sc->sc_mask, mask);
249 }
250 
251 static void
252 linux_rt_sendsig(const ksiginfo_t *ksi, const sigset_t *mask)
253 {
254 	struct lwp *l = curlwp;
255 	struct proc *p = l->l_proc;
256 	struct trapframe *tf;
257 	struct linux_rt_sigframe *fp, frame;
258 	int onstack, error;
259 	linux_siginfo_t *lsi;
260 	int sig = ksi->ksi_signo;
261 	sig_t catcher = SIGACTION(p, sig).sa_handler;
262 	struct sigaltstack *sas = &l->l_sigstk;
263 
264 	tf = l->l_md.md_regs;
265 	/* Do we need to jump onto the signal stack? */
266 	onstack = (sas->ss_flags & (SS_DISABLE | SS_ONSTACK)) == 0 &&
267 	    (SIGACTION(p, sig).sa_flags & SA_ONSTACK) != 0;
268 
269 
270 	/* Allocate space for the signal handler context. */
271 	if (onstack)
272 		fp = (struct linux_rt_sigframe *)((char *)sas->ss_sp +
273 		    sas->ss_size);
274 	else
275 		fp = (struct linux_rt_sigframe *)tf->tf_esp;
276 	fp--;
277 
278 	DPRINTF(("rt: onstack = %d, fp = %p sig = %d eip = 0x%x cr2 = 0x%x\n",
279 	    onstack, fp, sig, tf->tf_eip, l->l_addr->u_pcb.pcb_cr2));
280 
281 	/* Build stack frame for signal trampoline. */
282 	frame.sf_handler = catcher;
283 	frame.sf_sig = native_to_linux_signo[sig];
284 	frame.sf_sip = &fp->sf_si;
285 	frame.sf_ucp = &fp->sf_uc;
286 
287 	/*
288 	 * XXX: the following code assumes that the constants for
289 	 * siginfo are the same between linux and NetBSD.
290 	 */
291 	(void)memset(lsi = &frame.sf_si, 0, sizeof(frame.sf_si));
292 	lsi->lsi_errno = native_to_linux_errno[ksi->ksi_errno];
293 	lsi->lsi_code = ksi->ksi_code;
294 	switch (lsi->lsi_signo = frame.sf_sig) {
295 	case LINUX_SIGILL:
296 	case LINUX_SIGFPE:
297 	case LINUX_SIGSEGV:
298 	case LINUX_SIGBUS:
299 	case LINUX_SIGTRAP:
300 		lsi->lsi_addr = ksi->ksi_addr;
301 		break;
302 	case LINUX_SIGCHLD:
303 		lsi->lsi_uid = ksi->ksi_uid;
304 		lsi->lsi_pid = ksi->ksi_pid;
305 		lsi->lsi_utime = ksi->ksi_utime;
306 		lsi->lsi_stime = ksi->ksi_stime;
307 
308 		/* We use the same codes */
309 		lsi->lsi_code = ksi->ksi_code;
310 		/* XXX is that right? */
311 		lsi->lsi_status = WEXITSTATUS(ksi->ksi_status);
312 		break;
313 	case LINUX_SIGIO:
314 		lsi->lsi_band = ksi->ksi_band;
315 		lsi->lsi_fd = ksi->ksi_fd;
316 		break;
317 	default:
318 		lsi->lsi_uid = ksi->ksi_uid;
319 		lsi->lsi_pid = ksi->ksi_pid;
320 		if (lsi->lsi_signo == LINUX_SIGALRM ||
321 		    lsi->lsi_signo >= LINUX_SIGRTMIN)
322 			lsi->lsi_value.sival_ptr = ksi->ksi_value.sival_ptr;
323 		break;
324 	}
325 
326 	/* Save register context. */
327 	linux_save_ucontext(l, tf, mask, sas, &frame.sf_uc);
328 	sendsig_reset(l, sig);
329 
330 	mutex_exit(&p->p_smutex);
331 	error = copyout(&frame, fp, sizeof(frame));
332 	mutex_enter(&p->p_smutex);
333 
334 	if (error != 0) {
335 		/*
336 		 * Process has trashed its stack; give it an illegal
337 		 * instruction to halt it in its tracks.
338 		 */
339 		sigexit(l, SIGILL);
340 		/* NOTREACHED */
341 	}
342 
343 	/*
344 	 * Build context to run handler in.
345 	 */
346 	tf->tf_gs = GSEL(GUDATA_SEL, SEL_UPL);
347 	tf->tf_fs = GSEL(GUDATA_SEL, SEL_UPL);
348 	tf->tf_es = GSEL(GUDATA_SEL, SEL_UPL);
349 	tf->tf_ds = GSEL(GUDATA_SEL, SEL_UPL);
350 	tf->tf_eip = ((int)p->p_sigctx.ps_sigcode) +
351 	    (linux_rt_sigcode - linux_sigcode);
352 	tf->tf_cs = GSEL(GUCODE_SEL, SEL_UPL);
353 	tf->tf_eflags &= ~(PSL_T|PSL_VM|PSL_AC);
354 	tf->tf_esp = (int)fp;
355 	tf->tf_ss = GSEL(GUDATA_SEL, SEL_UPL);
356 
357 	/* Remember that we're now on the signal stack. */
358 	if (onstack)
359 		sas->ss_flags |= SS_ONSTACK;
360 }
361 
362 static void
363 linux_old_sendsig(const ksiginfo_t *ksi, const sigset_t *mask)
364 {
365 	struct lwp *l = curlwp;
366 	struct proc *p = l->l_proc;
367 	struct trapframe *tf;
368 	struct linux_sigframe *fp, frame;
369 	int onstack, error;
370 	int sig = ksi->ksi_signo;
371 	sig_t catcher = SIGACTION(p, sig).sa_handler;
372 	struct sigaltstack *sas = &l->l_sigstk;
373 
374 	tf = l->l_md.md_regs;
375 
376 	/* Do we need to jump onto the signal stack? */
377 	onstack = (sas->ss_flags & (SS_DISABLE | SS_ONSTACK)) == 0 &&
378 	    (SIGACTION(p, sig).sa_flags & SA_ONSTACK) != 0;
379 
380 	/* Allocate space for the signal handler context. */
381 	if (onstack)
382 		fp = (struct linux_sigframe *) ((char *)sas->ss_sp +
383 		    sas->ss_size);
384 	else
385 		fp = (struct linux_sigframe *)tf->tf_esp;
386 	fp--;
387 
388 	DPRINTF(("old: onstack = %d, fp = %p sig = %d eip = 0x%x cr2 = 0x%x\n",
389 	    onstack, fp, sig, tf->tf_eip, l->l_addr->u_pcb.pcb_cr2));
390 
391 	/* Build stack frame for signal trampoline. */
392 	frame.sf_handler = catcher;
393 	frame.sf_sig = native_to_linux_signo[sig];
394 
395 	linux_save_sigcontext(l, tf, mask, &frame.sf_sc);
396 	sendsig_reset(l, sig);
397 
398 	mutex_exit(&p->p_smutex);
399 	error = copyout(&frame, fp, sizeof(frame));
400 	mutex_enter(&p->p_smutex);
401 
402 	if (error != 0) {
403 		/*
404 		 * Process has trashed its stack; give it an illegal
405 		 * instruction to halt it in its tracks.
406 		 */
407 		sigexit(l, SIGILL);
408 		/* NOTREACHED */
409 	}
410 
411 	/*
412 	 * Build context to run handler in.
413 	 */
414 	tf->tf_gs = GSEL(GUDATA_SEL, SEL_UPL);
415 	tf->tf_fs = GSEL(GUDATA_SEL, SEL_UPL);
416 	tf->tf_es = GSEL(GUDATA_SEL, SEL_UPL);
417 	tf->tf_ds = GSEL(GUDATA_SEL, SEL_UPL);
418 	tf->tf_eip = (int)p->p_sigctx.ps_sigcode;
419 	tf->tf_cs = GSEL(GUCODEBIG_SEL, SEL_UPL);
420 	tf->tf_eflags &= ~(PSL_T|PSL_VM|PSL_AC);
421 	tf->tf_esp = (int)fp;
422 	tf->tf_ss = GSEL(GUDATA_SEL, SEL_UPL);
423 
424 	/* Remember that we're now on the signal stack. */
425 	if (onstack)
426 		sas->ss_flags |= SS_ONSTACK;
427 }
428 
429 /*
430  * System call to cleanup state after a signal
431  * has been taken.  Reset signal mask and
432  * stack state from context left by sendsig (above).
433  * Return to previous pc and psl as specified by
434  * context left by sendsig. Check carefully to
435  * make sure that the user has not modified the
436  * psl to gain improper privileges or to cause
437  * a machine fault.
438  */
439 int
440 linux_sys_rt_sigreturn(struct lwp *l, void *v, register_t *retval)
441 {
442 	struct linux_sys_rt_sigreturn_args /* {
443 		syscallarg(struct linux_ucontext *) ucp;
444 	} */ *uap = v;
445 	struct linux_ucontext context, *ucp = SCARG(uap, ucp);
446 	int error;
447 
448 	/*
449 	 * The trampoline code hands us the context.
450 	 * It is unsafe to keep track of it ourselves, in the event that a
451 	 * program jumps out of a signal handler.
452 	 */
453 	if ((error = copyin(ucp, &context, sizeof(*ucp))) != 0)
454 		return error;
455 
456 	/* XXX XAX we can do better here by using more of the ucontext */
457 	return linux_restore_sigcontext(l, &context.uc_mcontext, retval);
458 }
459 
460 int
461 linux_sys_sigreturn(struct lwp *l, void *v, register_t *retval)
462 {
463 	struct linux_sys_sigreturn_args /* {
464 		syscallarg(struct linux_sigcontext *) scp;
465 	} */ *uap = v;
466 	struct linux_sigcontext context, *scp = SCARG(uap, scp);
467 	int error;
468 
469 	/*
470 	 * The trampoline code hands us the context.
471 	 * It is unsafe to keep track of it ourselves, in the event that a
472 	 * program jumps out of a signal handler.
473 	 */
474 	if ((error = copyin((void *)scp, &context, sizeof(*scp))) != 0)
475 		return error;
476 	return linux_restore_sigcontext(l, &context, retval);
477 }
478 
479 static int
480 linux_restore_sigcontext(struct lwp *l, struct linux_sigcontext *scp,
481     register_t *retval)
482 {
483 	struct proc *p = l->l_proc;
484 	struct sigaltstack *sas = &l->l_sigstk;
485 	struct trapframe *tf;
486 	sigset_t mask;
487 	ssize_t ss_gap;
488 	/* Restore register context. */
489 	tf = l->l_md.md_regs;
490 
491 	DPRINTF(("sigreturn enter esp=%x eip=%x\n", tf->tf_esp, tf->tf_eip));
492 #ifdef VM86
493 	if (scp->sc_eflags & PSL_VM) {
494 		void syscall_vm86(struct trapframe *);
495 
496 		tf->tf_vm86_gs = scp->sc_gs;
497 		tf->tf_vm86_fs = scp->sc_fs;
498 		tf->tf_vm86_es = scp->sc_es;
499 		tf->tf_vm86_ds = scp->sc_ds;
500 		set_vflags(l, scp->sc_eflags);
501 		p->p_md.md_syscall = syscall_vm86;
502 	} else
503 #endif
504 	{
505 		/*
506 		 * Check for security violations.  If we're returning to
507 		 * protected mode, the CPU will validate the segment registers
508 		 * automatically and generate a trap on violations.  We handle
509 		 * the trap, rather than doing all of the checking here.
510 		 */
511 		if (((scp->sc_eflags ^ tf->tf_eflags) & PSL_USERSTATIC) != 0 ||
512 		    !USERMODE(scp->sc_cs, scp->sc_eflags))
513 			return EINVAL;
514 
515 		tf->tf_gs = scp->sc_gs;
516 		tf->tf_fs = scp->sc_fs;
517 		tf->tf_es = scp->sc_es;
518 		tf->tf_ds = scp->sc_ds;
519 #ifdef VM86
520 		if (tf->tf_eflags & PSL_VM)
521 			(*p->p_emul->e_syscall_intern)(p);
522 #endif
523 		tf->tf_eflags = scp->sc_eflags;
524 	}
525 	tf->tf_edi = scp->sc_edi;
526 	tf->tf_esi = scp->sc_esi;
527 	tf->tf_ebp = scp->sc_ebp;
528 	tf->tf_ebx = scp->sc_ebx;
529 	tf->tf_edx = scp->sc_edx;
530 	tf->tf_ecx = scp->sc_ecx;
531 	tf->tf_eax = scp->sc_eax;
532 	tf->tf_eip = scp->sc_eip;
533 	tf->tf_cs = scp->sc_cs;
534 	tf->tf_esp = scp->sc_esp_at_signal;
535 	tf->tf_ss = scp->sc_ss;
536 
537 	/* Restore signal stack. */
538 	/*
539 	 * Linux really does it this way; it doesn't have space in sigframe
540 	 * to save the onstack flag.
541 	 */
542 	mutex_enter(&p->p_smutex);
543 	ss_gap = (ssize_t)((char *)scp->sc_esp_at_signal - (char *)sas->ss_sp);
544 	if (ss_gap >= 0 && ss_gap < sas->ss_size)
545 		sas->ss_flags |= SS_ONSTACK;
546 	else
547 		sas->ss_flags &= ~SS_ONSTACK;
548 
549 	/* Restore signal mask. */
550 	linux_old_to_native_sigset(&mask, &scp->sc_mask);
551 	(void) sigprocmask1(l, SIG_SETMASK, &mask, 0);
552 	mutex_exit(&p->p_smutex);
553 
554 	DPRINTF(("sigreturn exit esp=%x eip=%x\n", tf->tf_esp, tf->tf_eip));
555 	return EJUSTRETURN;
556 }
557 
558 #ifdef USER_LDT
559 
560 static int
561 linux_read_ldt(struct lwp *l, struct linux_sys_modify_ldt_args *uap,
562     register_t *retval)
563 {
564 	struct x86_get_ldt_args gl;
565 	int error;
566 	int num_ldt;
567 	union descriptor *ldt_buf;
568 
569 	/*
570 	 * I've checked the linux code - this function is asymetric with
571 	 * linux_write_ldt, and returns raw ldt entries.
572 	 * NB, the code I saw zerod the spare parts of the user buffer.
573 	 */
574 
575 	DPRINTF(("linux_read_ldt!"));
576 
577 	num_ldt = x86_get_ldt_len(l);
578 	if (num_ldt <= 0)
579 		return EINVAL;
580 
581 	gl.start = 0;
582 	gl.desc = NULL;
583 	gl.num = SCARG(uap, bytecount) / sizeof(union descriptor);
584 
585 	if (gl.num > num_ldt)
586 		gl.num = num_ldt;
587 
588 	ldt_buf = malloc(gl.num * sizeof *ldt, M_TEMP, M_WAITOK);
589 
590 	error = x86_get_ldt1(l, &gl, ldt_buf);
591 	/* NB gl.num might have changed */
592 	if (error == 0) {
593 		*retval = gl.num * sizeof *ldt;
594 		error = copyout(ldt_buf, SCARG(uap, ptr),
595 		    gl.num * sizeof *ldt_buf);
596 	}
597 	free(ldt, M_TEMP);
598 
599 	return error;
600 }
601 
602 struct linux_ldt_info {
603 	u_int entry_number;
604 	u_long base_addr;
605 	u_int limit;
606 	u_int seg_32bit:1;
607 	u_int contents:2;
608 	u_int read_exec_only:1;
609 	u_int limit_in_pages:1;
610 	u_int seg_not_present:1;
611 	u_int useable:1;
612 };
613 
614 static int
615 linux_write_ldt(struct lwp *l, struct linux_sys_modify_ldt_args *uap,
616     int oldmode)
617 {
618 	struct linux_ldt_info ldt_info;
619 	union descriptor d;
620 	struct x86_set_ldt_args sl;
621 	int error;
622 
623 	DPRINTF(("linux_write_ldt %d\n", oldmode));
624 	if (SCARG(uap, bytecount) != sizeof(ldt_info))
625 		return (EINVAL);
626 	if ((error = copyin(SCARG(uap, ptr), &ldt_info, sizeof(ldt_info))) != 0)
627 		return error;
628 	if (ldt_info.entry_number >= 8192)
629 		return (EINVAL);
630 	if (ldt_info.contents == 3) {
631 		if (oldmode)
632 			return (EINVAL);
633 		if (ldt_info.seg_not_present)
634 			return (EINVAL);
635 	}
636 
637 	if (ldt_info.base_addr == 0 && ldt_info.limit == 0 &&
638 	    (oldmode || (ldt_info.contents == 0 &&
639 	    ldt_info.read_exec_only == 1 && ldt_info.seg_32bit == 0 &&
640 	    ldt_info.limit_in_pages == 0 && ldt_info.seg_not_present == 1 &&
641 	    ldt_info.useable == 0))) {
642 		/* this means you should zero the ldt */
643 		(void)memset(&d, 0, sizeof(d));
644 	} else {
645 		d.sd.sd_lobase = ldt_info.base_addr & 0xffffff;
646 		d.sd.sd_hibase = (ldt_info.base_addr >> 24) & 0xff;
647 		d.sd.sd_lolimit = ldt_info.limit & 0xffff;
648 		d.sd.sd_hilimit = (ldt_info.limit >> 16) & 0xf;
649 		d.sd.sd_type = 16 | (ldt_info.contents << 2) |
650 		    (!ldt_info.read_exec_only << 1);
651 		d.sd.sd_dpl = SEL_UPL;
652 		d.sd.sd_p = !ldt_info.seg_not_present;
653 		d.sd.sd_def32 = ldt_info.seg_32bit;
654 		d.sd.sd_gran = ldt_info.limit_in_pages;
655 		if (!oldmode)
656 			d.sd.sd_xx = ldt_info.useable;
657 		else
658 			d.sd.sd_xx = 0;
659 	}
660 	sl.start = ldt_info.entry_number;
661 	sl.desc = NULL;;
662 	sl.num = 1;
663 
664 	DPRINTF(("linux_write_ldt: idx=%d, base=0x%lx, limit=0x%x\n",
665 	    ldt_info.entry_number, ldt_info.base_addr, ldt_info.limit));
666 
667 	return x86_set_ldt1(l, &sl, &d);
668 }
669 
670 #endif /* USER_LDT */
671 
672 int
673 linux_sys_modify_ldt(struct lwp *l, void *v,
674     register_t *retval)
675 {
676 	struct linux_sys_modify_ldt_args /* {
677 		syscallarg(int) func;
678 		syscallarg(void *) ptr;
679 		syscallarg(size_t) bytecount;
680 	} */ *uap = v;
681 
682 	switch (SCARG(uap, func)) {
683 #ifdef USER_LDT
684 	case 0:
685 		return linux_read_ldt(l, uap, retval);
686 	case 1:
687 		return linux_write_ldt(l, uap, 1);
688 	case 2:
689 #ifdef notyet
690 		return (linux_read_default_ldt(l, uap, retval);
691 #else
692 		return (ENOSYS);
693 #endif
694 	case 0x11:
695 		return linux_write_ldt(l, uap, 0);
696 #endif /* USER_LDT */
697 
698 	default:
699 		return (ENOSYS);
700 	}
701 }
702 
703 /*
704  * XXX Pathetic hack to make svgalib work. This will fake the major
705  * device number of an opened VT so that svgalib likes it. grmbl.
706  * Should probably do it 'wrong the right way' and use a mapping
707  * array for all major device numbers, and map linux_mknod too.
708  */
709 dev_t
710 linux_fakedev(dev_t dev, int raw)
711 {
712 	extern const struct cdevsw ptc_cdevsw, pts_cdevsw;
713 	const struct cdevsw *cd = cdevsw_lookup(dev);
714 
715 	if (raw) {
716 #if (NWSDISPLAY > 0)
717 		extern const struct cdevsw wsdisplay_cdevsw;
718 		if (cd == &wsdisplay_cdevsw)
719 			return makedev(LINUX_CONS_MAJOR, (minor(dev) + 1));
720 #endif
721 	}
722 
723 	if (cd == &ptc_cdevsw)
724 		return makedev(LINUX_PTC_MAJOR, minor(dev));
725 	if (cd == &pts_cdevsw)
726 		return makedev(LINUX_PTS_MAJOR, minor(dev));
727 
728 	return dev;
729 }
730 
731 #if (NWSDISPLAY > 0)
732 /*
733  * That's not complete, but enough to get an X server running.
734  */
735 #define NR_KEYS 128
736 static const u_short plain_map[NR_KEYS] = {
737 	0x0200,	0x001b,	0x0031,	0x0032,	0x0033,	0x0034,	0x0035,	0x0036,
738 	0x0037,	0x0038,	0x0039,	0x0030,	0x002d,	0x003d,	0x007f,	0x0009,
739 	0x0b71,	0x0b77,	0x0b65,	0x0b72,	0x0b74,	0x0b79,	0x0b75,	0x0b69,
740 	0x0b6f,	0x0b70,	0x005b,	0x005d,	0x0201,	0x0702,	0x0b61,	0x0b73,
741 	0x0b64,	0x0b66,	0x0b67,	0x0b68,	0x0b6a,	0x0b6b,	0x0b6c,	0x003b,
742 	0x0027,	0x0060,	0x0700,	0x005c,	0x0b7a,	0x0b78,	0x0b63,	0x0b76,
743 	0x0b62,	0x0b6e,	0x0b6d,	0x002c,	0x002e,	0x002f,	0x0700,	0x030c,
744 	0x0703,	0x0020,	0x0207,	0x0100,	0x0101,	0x0102,	0x0103,	0x0104,
745 	0x0105,	0x0106,	0x0107,	0x0108,	0x0109,	0x0208,	0x0209,	0x0307,
746 	0x0308,	0x0309,	0x030b,	0x0304,	0x0305,	0x0306,	0x030a,	0x0301,
747 	0x0302,	0x0303,	0x0300,	0x0310,	0x0206,	0x0200,	0x003c,	0x010a,
748 	0x010b,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
749 	0x030e,	0x0702,	0x030d,	0x001c,	0x0701,	0x0205,	0x0114,	0x0603,
750 	0x0118,	0x0601,	0x0602,	0x0117,	0x0600,	0x0119,	0x0115,	0x0116,
751 	0x011a,	0x010c,	0x010d,	0x011b,	0x011c,	0x0110,	0x0311,	0x011d,
752 	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
753 }, shift_map[NR_KEYS] = {
754 	0x0200,	0x001b,	0x0021,	0x0040,	0x0023,	0x0024,	0x0025,	0x005e,
755 	0x0026,	0x002a,	0x0028,	0x0029,	0x005f,	0x002b,	0x007f,	0x0009,
756 	0x0b51,	0x0b57,	0x0b45,	0x0b52,	0x0b54,	0x0b59,	0x0b55,	0x0b49,
757 	0x0b4f,	0x0b50,	0x007b,	0x007d,	0x0201,	0x0702,	0x0b41,	0x0b53,
758 	0x0b44,	0x0b46,	0x0b47,	0x0b48,	0x0b4a,	0x0b4b,	0x0b4c,	0x003a,
759 	0x0022,	0x007e,	0x0700,	0x007c,	0x0b5a,	0x0b58,	0x0b43,	0x0b56,
760 	0x0b42,	0x0b4e,	0x0b4d,	0x003c,	0x003e,	0x003f,	0x0700,	0x030c,
761 	0x0703,	0x0020,	0x0207,	0x010a,	0x010b,	0x010c,	0x010d,	0x010e,
762 	0x010f,	0x0110,	0x0111,	0x0112,	0x0113,	0x0213,	0x0203,	0x0307,
763 	0x0308,	0x0309,	0x030b,	0x0304,	0x0305,	0x0306,	0x030a,	0x0301,
764 	0x0302,	0x0303,	0x0300,	0x0310,	0x0206,	0x0200,	0x003e,	0x010a,
765 	0x010b,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
766 	0x030e,	0x0702,	0x030d,	0x0200,	0x0701,	0x0205,	0x0114,	0x0603,
767 	0x020b,	0x0601,	0x0602,	0x0117,	0x0600,	0x020a,	0x0115,	0x0116,
768 	0x011a,	0x010c,	0x010d,	0x011b,	0x011c,	0x0110,	0x0311,	0x011d,
769 	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
770 }, altgr_map[NR_KEYS] = {
771 	0x0200,	0x0200,	0x0200,	0x0040,	0x0200,	0x0024,	0x0200,	0x0200,
772 	0x007b,	0x005b,	0x005d,	0x007d,	0x005c,	0x0200,	0x0200,	0x0200,
773 	0x0b71,	0x0b77,	0x0918,	0x0b72,	0x0b74,	0x0b79,	0x0b75,	0x0b69,
774 	0x0b6f,	0x0b70,	0x0200,	0x007e,	0x0201,	0x0702,	0x0914,	0x0b73,
775 	0x0917,	0x0919,	0x0b67,	0x0b68,	0x0b6a,	0x0b6b,	0x0b6c,	0x0200,
776 	0x0200,	0x0200,	0x0700,	0x0200,	0x0b7a,	0x0b78,	0x0916,	0x0b76,
777 	0x0915,	0x0b6e,	0x0b6d,	0x0200,	0x0200,	0x0200,	0x0700,	0x030c,
778 	0x0703,	0x0200,	0x0207,	0x050c,	0x050d,	0x050e,	0x050f,	0x0510,
779 	0x0511,	0x0512,	0x0513,	0x0514,	0x0515,	0x0208,	0x0202,	0x0911,
780 	0x0912,	0x0913,	0x030b,	0x090e,	0x090f,	0x0910,	0x030a,	0x090b,
781 	0x090c,	0x090d,	0x090a,	0x0310,	0x0206,	0x0200,	0x007c,	0x0516,
782 	0x0517,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
783 	0x030e,	0x0702,	0x030d,	0x0200,	0x0701,	0x0205,	0x0114,	0x0603,
784 	0x0118,	0x0601,	0x0602,	0x0117,	0x0600,	0x0119,	0x0115,	0x0116,
785 	0x011a,	0x010c,	0x010d,	0x011b,	0x011c,	0x0110,	0x0311,	0x011d,
786 	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
787 }, ctrl_map[NR_KEYS] = {
788 	0x0200,	0x0200,	0x0200,	0x0000,	0x001b,	0x001c,	0x001d,	0x001e,
789 	0x001f,	0x007f,	0x0200,	0x0200,	0x001f,	0x0200,	0x0008,	0x0200,
790 	0x0011,	0x0017,	0x0005,	0x0012,	0x0014,	0x0019,	0x0015,	0x0009,
791 	0x000f,	0x0010,	0x001b,	0x001d,	0x0201,	0x0702,	0x0001,	0x0013,
792 	0x0004,	0x0006,	0x0007,	0x0008,	0x000a,	0x000b,	0x000c,	0x0200,
793 	0x0007,	0x0000,	0x0700,	0x001c,	0x001a,	0x0018,	0x0003,	0x0016,
794 	0x0002,	0x000e,	0x000d,	0x0200,	0x020e,	0x007f,	0x0700,	0x030c,
795 	0x0703,	0x0000,	0x0207,	0x0100,	0x0101,	0x0102,	0x0103,	0x0104,
796 	0x0105,	0x0106,	0x0107,	0x0108,	0x0109,	0x0208,	0x0204,	0x0307,
797 	0x0308,	0x0309,	0x030b,	0x0304,	0x0305,	0x0306,	0x030a,	0x0301,
798 	0x0302,	0x0303,	0x0300,	0x0310,	0x0206,	0x0200,	0x0200,	0x010a,
799 	0x010b,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
800 	0x030e,	0x0702,	0x030d,	0x001c,	0x0701,	0x0205,	0x0114,	0x0603,
801 	0x0118,	0x0601,	0x0602,	0x0117,	0x0600,	0x0119,	0x0115,	0x0116,
802 	0x011a,	0x010c,	0x010d,	0x011b,	0x011c,	0x0110,	0x0311,	0x011d,
803 	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
804 };
805 
806 const u_short * const linux_keytabs[] = {
807 	plain_map, shift_map, altgr_map, altgr_map, ctrl_map
808 };
809 #endif
810 
811 static struct biosdisk_info *
812 fd2biosinfo(struct proc *p, struct file *fp)
813 {
814 	struct vnode *vp;
815 	const char *blkname;
816 	char diskname[16];
817 	int i;
818 	struct nativedisk_info *nip;
819 	struct disklist *dl = x86_alldisks;
820 
821 	if (fp->f_type != DTYPE_VNODE)
822 		return NULL;
823 	vp = (struct vnode *)fp->f_data;
824 
825 	if (vp->v_type != VBLK)
826 		return NULL;
827 
828 	blkname = devsw_blk2name(major(vp->v_rdev));
829 	snprintf(diskname, sizeof diskname, "%s%u", blkname,
830 	    DISKUNIT(vp->v_rdev));
831 
832 	for (i = 0; i < dl->dl_nnativedisks; i++) {
833 		nip = &dl->dl_nativedisks[i];
834 		if (strcmp(diskname, nip->ni_devname))
835 			continue;
836 		if (nip->ni_nmatches != 0)
837 			return &dl->dl_biosdisks[nip->ni_biosmatches[0]];
838 	}
839 
840 	return NULL;
841 }
842 
843 
844 /*
845  * We come here in a last attempt to satisfy a Linux ioctl() call
846  */
847 int
848 linux_machdepioctl(struct lwp *l, void *v, register_t *retval)
849 {
850 	struct linux_sys_ioctl_args /* {
851 		syscallarg(int) fd;
852 		syscallarg(u_long) com;
853 		syscallarg(void *) data;
854 	} */ *uap = v;
855 	struct sys_ioctl_args bia;
856 	u_long com;
857 	int error, error1;
858 #if (NWSDISPLAY > 0)
859 	struct vt_mode lvt;
860 	struct kbentry kbe;
861 #endif
862 	struct linux_hd_geometry hdg;
863 	struct linux_hd_big_geometry hdg_big;
864 	struct biosdisk_info *bip;
865 	struct filedesc *fdp;
866 	struct file *fp;
867 	int fd;
868 	struct disklabel label, *labp;
869 	struct partinfo partp;
870 	int (*ioctlf)(struct file *, u_long, void *, struct lwp *);
871 	u_long start, biostotal, realtotal;
872 	u_char heads, sectors;
873 	u_int cylinders;
874 	struct ioctl_pt pt;
875 	struct proc *p = l->l_proc;
876 
877 	fd = SCARG(uap, fd);
878 	SCARG(&bia, fd) = fd;
879 	SCARG(&bia, data) = SCARG(uap, data);
880 	com = SCARG(uap, com);
881 
882 	fdp = p->p_fd;
883 
884 	if ((fp = fd_getfile(fdp, fd)) == NULL)
885 		return (EBADF);
886 
887 	FILE_USE(fp);
888 
889 	switch (com) {
890 #if (NWSDISPLAY > 0)
891 	case LINUX_KDGKBMODE:
892 		com = KDGKBMODE;
893 		break;
894 	case LINUX_KDSKBMODE:
895 		com = KDSKBMODE;
896 		if ((unsigned)SCARG(uap, data) == LINUX_K_MEDIUMRAW)
897 			SCARG(&bia, data) = (void *)K_RAW;
898 		break;
899 	case LINUX_KIOCSOUND:
900 		SCARG(&bia, data) =
901 		    (void *)(((unsigned long)SCARG(&bia, data)) & 0xffff);
902 		/* fall through */
903 	case LINUX_KDMKTONE:
904 		com = KDMKTONE;
905 		break;
906 	case LINUX_KDSETMODE:
907 		com = KDSETMODE;
908 		break;
909 	case LINUX_KDGETMODE:
910 		/* KD_* values are equal to the wscons numbers */
911 		com = WSDISPLAYIO_GMODE;
912 		break;
913 	case LINUX_KDENABIO:
914 		com = KDENABIO;
915 		break;
916 	case LINUX_KDDISABIO:
917 		com = KDDISABIO;
918 		break;
919 	case LINUX_KDGETLED:
920 		com = KDGETLED;
921 		break;
922 	case LINUX_KDSETLED:
923 		com = KDSETLED;
924 		break;
925 	case LINUX_VT_OPENQRY:
926 		com = VT_OPENQRY;
927 		break;
928 	case LINUX_VT_GETMODE:
929 		error = fp->f_ops->fo_ioctl(fp, VT_GETMODE, &lvt, l);
930 		if (error != 0)
931 			goto out;
932 		lvt.relsig = native_to_linux_signo[lvt.relsig];
933 		lvt.acqsig = native_to_linux_signo[lvt.acqsig];
934 		lvt.frsig = native_to_linux_signo[lvt.frsig];
935 		error = copyout(&lvt, SCARG(uap, data), sizeof (lvt));
936 		goto out;
937 	case LINUX_VT_SETMODE:
938 		error = copyin(SCARG(uap, data), &lvt, sizeof (lvt));
939 		if (error != 0)
940 			goto out;
941 		lvt.relsig = linux_to_native_signo[lvt.relsig];
942 		lvt.acqsig = linux_to_native_signo[lvt.acqsig];
943 		lvt.frsig = linux_to_native_signo[lvt.frsig];
944 		error = fp->f_ops->fo_ioctl(fp, VT_SETMODE, &lvt, l);
945 		goto out;
946 	case LINUX_VT_DISALLOCATE:
947 		/* XXX should use WSDISPLAYIO_DELSCREEN */
948 		error = 0;
949 		goto out;
950 	case LINUX_VT_RELDISP:
951 		com = VT_RELDISP;
952 		break;
953 	case LINUX_VT_ACTIVATE:
954 		com = VT_ACTIVATE;
955 		break;
956 	case LINUX_VT_WAITACTIVE:
957 		com = VT_WAITACTIVE;
958 		break;
959 	case LINUX_VT_GETSTATE:
960 		com = VT_GETSTATE;
961 		break;
962 	case LINUX_KDGKBTYPE:
963 	    {
964 		static const u_int8_t kb101 = KB_101;
965 
966 		/* This is what Linux does. */
967 		error = copyout(&kb101, SCARG(uap, data), 1);
968 		goto out;
969 	    }
970 	case LINUX_KDGKBENT:
971 		/*
972 		 * The Linux KDGKBENT ioctl is different from the
973 		 * SYSV original. So we handle it in machdep code.
974 		 * XXX We should use keyboard mapping information
975 		 * from wsdisplay, but this would be expensive.
976 		 */
977 		if ((error = copyin(SCARG(uap, data), &kbe,
978 				    sizeof(struct kbentry))))
979 			goto out;
980 		if (kbe.kb_table >= sizeof(linux_keytabs) / sizeof(u_short *)
981 		    || kbe.kb_index >= NR_KEYS) {
982 			error = EINVAL;
983 			goto out;
984 		}
985 		kbe.kb_value = linux_keytabs[kbe.kb_table][kbe.kb_index];
986 		error = copyout(&kbe, SCARG(uap, data),
987 				sizeof(struct kbentry));
988 		goto out;
989 #endif
990 	case LINUX_HDIO_GETGEO:
991 	case LINUX_HDIO_GETGEO_BIG:
992 		/*
993 		 * Try to mimic Linux behaviour: return the BIOS geometry
994 		 * if possible (extending its # of cylinders if it's beyond
995 		 * the 1023 limit), fall back to the MI geometry (i.e.
996 		 * the real geometry) if not found, by returning an
997 		 * error. See common/linux_hdio.c
998 		 */
999 		bip = fd2biosinfo(p, fp);
1000 		ioctlf = fp->f_ops->fo_ioctl;
1001 		error = ioctlf(fp, DIOCGDEFLABEL, (void *)&label, l);
1002 		error1 = ioctlf(fp, DIOCGPART, (void *)&partp, l);
1003 		if (error != 0 && error1 != 0) {
1004 			error = error1;
1005 			goto out;
1006 		}
1007 		labp = error != 0 ? &label : partp.disklab;
1008 		start = error1 != 0 ? partp.part->p_offset : 0;
1009 		if (bip != NULL && bip->bi_head != 0 && bip->bi_sec != 0
1010 		    && bip->bi_cyl != 0) {
1011 			heads = bip->bi_head;
1012 			sectors = bip->bi_sec;
1013 			cylinders = bip->bi_cyl;
1014 			biostotal = heads * sectors * cylinders;
1015 			realtotal = labp->d_ntracks * labp->d_nsectors *
1016 			    labp->d_ncylinders;
1017 			if (realtotal > biostotal)
1018 				cylinders = realtotal / (heads * sectors);
1019 		} else {
1020 			heads = labp->d_ntracks;
1021 			cylinders = labp->d_ncylinders;
1022 			sectors = labp->d_nsectors;
1023 		}
1024 		if (com == LINUX_HDIO_GETGEO) {
1025 			hdg.start = start;
1026 			hdg.heads = heads;
1027 			hdg.cylinders = cylinders;
1028 			hdg.sectors = sectors;
1029 			error = copyout(&hdg, SCARG(uap, data), sizeof hdg);
1030 			goto out;
1031 		} else {
1032 			hdg_big.start = start;
1033 			hdg_big.heads = heads;
1034 			hdg_big.cylinders = cylinders;
1035 			hdg_big.sectors = sectors;
1036 			error = copyout(&hdg_big, SCARG(uap, data),
1037 			    sizeof hdg_big);
1038 			goto out;
1039 		}
1040 
1041 	default:
1042 		/*
1043 		 * Unknown to us. If it's on a device, just pass it through
1044 		 * using PTIOCLINUX, the device itself might be able to
1045 		 * make some sense of it.
1046 		 * XXX hack: if the function returns EJUSTRETURN,
1047 		 * it has stuffed a sysctl return value in pt.data.
1048 		 */
1049 		ioctlf = fp->f_ops->fo_ioctl;
1050 		pt.com = SCARG(uap, com);
1051 		pt.data = SCARG(uap, data);
1052 		error = ioctlf(fp, PTIOCLINUX, (void *)&pt, l);
1053 		if (error == EJUSTRETURN) {
1054 			retval[0] = (register_t)pt.data;
1055 			error = 0;
1056 		}
1057 
1058 		if (error == ENOTTY) {
1059 			DPRINTF(("linux_machdepioctl: invalid ioctl %08lx\n",
1060 			    com));
1061 		}
1062 		goto out;
1063 	}
1064 	SCARG(&bia, com) = com;
1065 	/* XXX NJWLWP */
1066 	error = sys_ioctl(curlwp, &bia, retval);
1067 out:
1068 	FILE_UNUSE(fp ,l);
1069 	return error;
1070 }
1071 
1072 /*
1073  * Set I/O permissions for a process. Just set the maximum level
1074  * right away (ignoring the argument), otherwise we would have
1075  * to rely on I/O permission maps, which are not implemented.
1076  */
1077 int
1078 linux_sys_iopl(struct lwp *l, void *v, register_t *retval)
1079 {
1080 #if 0
1081 	struct linux_sys_iopl_args /* {
1082 		syscallarg(int) level;
1083 	} */ *uap = v;
1084 #endif
1085 	struct trapframe *fp = l->l_md.md_regs;
1086 
1087 	if (kauth_authorize_machdep(l->l_cred, KAUTH_MACHDEP_IOPL,
1088 	    NULL, NULL, NULL, NULL) != 0)
1089 		return EPERM;
1090 	fp->tf_eflags |= PSL_IOPL;
1091 	*retval = 0;
1092 	return 0;
1093 }
1094 
1095 /*
1096  * See above. If a root process tries to set access to an I/O port,
1097  * just let it have the whole range.
1098  */
1099 int
1100 linux_sys_ioperm(struct lwp *l, void *v, register_t *retval)
1101 {
1102 	struct linux_sys_ioperm_args /* {
1103 		syscallarg(unsigned int) lo;
1104 		syscallarg(unsigned int) hi;
1105 		syscallarg(int) val;
1106 	} */ *uap = v;
1107 	struct trapframe *fp = l->l_md.md_regs;
1108 
1109 	if (kauth_authorize_machdep(l->l_cred, SCARG(uap, val) ?
1110 	    KAUTH_MACHDEP_IOPERM_SET : KAUTH_MACHDEP_IOPERM_GET, NULL, NULL,
1111 	    NULL, NULL) != 0)
1112 		return EPERM;
1113 	if (SCARG(uap, val))
1114 		fp->tf_eflags |= PSL_IOPL;
1115 	*retval = 0;
1116 	return 0;
1117 }
1118 
1119 int
1120 linux_usertrap(struct lwp *l, vaddr_t trapaddr,
1121     void *arg)
1122 {
1123 	return 0;
1124 }
1125 
1126 const char *
1127 linux_get_uname_arch(void)
1128 {
1129 	static char uname_arch[5] = "i386";
1130 
1131 	if (uname_arch[1] == '3')
1132 		uname_arch[1] += cpu_class;
1133 	return uname_arch;
1134 }
1135 
1136 #ifdef LINUX_NPTL
1137 void *
1138 linux_get_newtls(struct lwp *l)
1139 {
1140 	struct trapframe *tf = l->l_md.md_regs;
1141 
1142 	/* XXX: Implement me */
1143 	return NULL;
1144 }
1145 
1146 int
1147 linux_set_newtls(struct lwp *l, void *tls)
1148 {
1149 	/* XXX: Implement me */
1150 	return 0;
1151 }
1152 #endif
1153