xref: /netbsd-src/sys/compat/linux/arch/i386/linux_machdep.c (revision 0df165c04d0a9ca1adde9ed2b890344c937954a6)
1 /*	$NetBSD: linux_machdep.c,v 1.130 2007/10/19 12:16:38 ad Exp $	*/
2 
3 /*-
4  * Copyright (c) 1995, 2000 The NetBSD Foundation, Inc.
5  * All rights reserved.
6  *
7  * This code is derived from software contributed to The NetBSD Foundation
8  * by Frank van der Linden.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  * 3. All advertising materials mentioning features or use of this software
19  *    must display the following acknowledgement:
20  *	This product includes software developed by the NetBSD
21  *	Foundation, Inc. and its contributors.
22  * 4. Neither the name of The NetBSD Foundation nor the names of its
23  *    contributors may be used to endorse or promote products derived
24  *    from this software without specific prior written permission.
25  *
26  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
27  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
28  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
29  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
30  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36  * POSSIBILITY OF SUCH DAMAGE.
37  */
38 
39 #include <sys/cdefs.h>
40 __KERNEL_RCSID(0, "$NetBSD: linux_machdep.c,v 1.130 2007/10/19 12:16:38 ad Exp $");
41 
42 #if defined(_KERNEL_OPT)
43 #include "opt_vm86.h"
44 #include "opt_user_ldt.h"
45 #endif
46 
47 #include <sys/param.h>
48 #include <sys/systm.h>
49 #include <sys/signalvar.h>
50 #include <sys/kernel.h>
51 #include <sys/proc.h>
52 #include <sys/user.h>
53 #include <sys/buf.h>
54 #include <sys/reboot.h>
55 #include <sys/conf.h>
56 #include <sys/exec.h>
57 #include <sys/file.h>
58 #include <sys/callout.h>
59 #include <sys/malloc.h>
60 #include <sys/mbuf.h>
61 #include <sys/msgbuf.h>
62 #include <sys/mount.h>
63 #include <sys/vnode.h>
64 #include <sys/device.h>
65 #include <sys/syscallargs.h>
66 #include <sys/filedesc.h>
67 #include <sys/exec_elf.h>
68 #include <sys/disklabel.h>
69 #include <sys/ioctl.h>
70 #include <sys/wait.h>
71 #include <sys/kauth.h>
72 
73 #include <miscfs/specfs/specdev.h>
74 
75 #include <compat/linux/common/linux_types.h>
76 #include <compat/linux/common/linux_signal.h>
77 #include <compat/linux/common/linux_util.h>
78 #include <compat/linux/common/linux_ioctl.h>
79 #include <compat/linux/common/linux_hdio.h>
80 #include <compat/linux/common/linux_exec.h>
81 #include <compat/linux/common/linux_machdep.h>
82 #include <compat/linux/common/linux_errno.h>
83 
84 #include <compat/linux/linux_syscallargs.h>
85 
86 #include <sys/cpu.h>
87 #include <machine/cpufunc.h>
88 #include <machine/psl.h>
89 #include <machine/reg.h>
90 #include <machine/segments.h>
91 #include <machine/specialreg.h>
92 #include <machine/sysarch.h>
93 #include <machine/vm86.h>
94 #include <machine/vmparam.h>
95 
96 /*
97  * To see whether wscons is configured (for virtual console ioctl calls).
98  */
99 #if defined(_KERNEL_OPT)
100 #include "wsdisplay.h"
101 #endif
102 #if (NWSDISPLAY > 0)
103 #include <dev/wscons/wsconsio.h>
104 #include <dev/wscons/wsdisplay_usl_io.h>
105 #if defined(_KERNEL_OPT)
106 #include "opt_xserver.h"
107 #endif
108 #endif
109 
110 #ifdef DEBUG_LINUX
111 #define DPRINTF(a) uprintf a
112 #else
113 #define DPRINTF(a)
114 #endif
115 
116 static struct biosdisk_info *fd2biosinfo __P((struct proc *, struct file *));
117 extern struct disklist *x86_alldisks;
118 static void linux_save_ucontext __P((struct lwp *, struct trapframe *,
119     const sigset_t *, struct sigaltstack *, struct linux_ucontext *));
120 static void linux_save_sigcontext __P((struct lwp *, struct trapframe *,
121     const sigset_t *, struct linux_sigcontext *));
122 static int linux_restore_sigcontext __P((struct lwp *,
123     struct linux_sigcontext *, register_t *));
124 static void linux_rt_sendsig __P((const ksiginfo_t *, const sigset_t *));
125 static void linux_old_sendsig __P((const ksiginfo_t *, const sigset_t *));
126 
127 extern char linux_sigcode[], linux_rt_sigcode[];
128 /*
129  * Deal with some i386-specific things in the Linux emulation code.
130  */
131 
132 void
133 linux_setregs(l, epp, stack)
134 	struct lwp *l;
135 	struct exec_package *epp;
136 	u_long stack;
137 {
138 	struct pcb *pcb = &l->l_addr->u_pcb;
139 	struct trapframe *tf;
140 
141 #if NNPX > 0
142 	/* If we were using the FPU, forget about it. */
143 	if (npxproc == l)
144 		npxdrop();
145 #endif
146 
147 #ifdef USER_LDT
148 	pmap_ldt_cleanup(l);
149 #endif
150 
151 	l->l_md.md_flags &= ~MDL_USEDFPU;
152 
153 	if (i386_use_fxsave) {
154 		pcb->pcb_savefpu.sv_xmm.sv_env.en_cw = __Linux_NPXCW__;
155 		pcb->pcb_savefpu.sv_xmm.sv_env.en_mxcsr = __INITIAL_MXCSR__;
156 	} else
157 		pcb->pcb_savefpu.sv_87.sv_env.en_cw = __Linux_NPXCW__;
158 
159 	tf = l->l_md.md_regs;
160 	tf->tf_gs = GSEL(GUDATA_SEL, SEL_UPL);
161 	tf->tf_fs = GSEL(GUDATA_SEL, SEL_UPL);
162 	tf->tf_es = GSEL(GUDATA_SEL, SEL_UPL);
163 	tf->tf_ds = GSEL(GUDATA_SEL, SEL_UPL);
164 	tf->tf_edi = 0;
165 	tf->tf_esi = 0;
166 	tf->tf_ebp = 0;
167 	tf->tf_ebx = (int)l->l_proc->p_psstr;
168 	tf->tf_edx = 0;
169 	tf->tf_ecx = 0;
170 	tf->tf_eax = 0;
171 	tf->tf_eip = epp->ep_entry;
172 	tf->tf_cs = GSEL(GUCODEBIG_SEL, SEL_UPL);
173 	tf->tf_eflags = PSL_USERSET;
174 	tf->tf_esp = stack;
175 	tf->tf_ss = GSEL(GUDATA_SEL, SEL_UPL);
176 }
177 
178 /*
179  * Send an interrupt to process.
180  *
181  * Stack is set up to allow sigcode stored
182  * in u. to call routine, followed by kcall
183  * to sigreturn routine below.  After sigreturn
184  * resets the signal mask, the stack, and the
185  * frame pointer, it returns to the user
186  * specified pc, psl.
187  */
188 
189 void
190 linux_sendsig(const ksiginfo_t *ksi, const sigset_t *mask)
191 {
192 	if (SIGACTION(curproc, ksi->ksi_signo).sa_flags & SA_SIGINFO)
193 		linux_rt_sendsig(ksi, mask);
194 	else
195 		linux_old_sendsig(ksi, mask);
196 }
197 
198 
199 static void
200 linux_save_ucontext(l, tf, mask, sas, uc)
201 	struct lwp *l;
202 	struct trapframe *tf;
203 	const sigset_t *mask;
204 	struct sigaltstack *sas;
205 	struct linux_ucontext *uc;
206 {
207 	uc->uc_flags = 0;
208 	uc->uc_link = NULL;
209 	native_to_linux_sigaltstack(&uc->uc_stack, sas);
210 	linux_save_sigcontext(l, tf, mask, &uc->uc_mcontext);
211 	native_to_linux_sigset(&uc->uc_sigmask, mask);
212 	(void)memset(&uc->uc_fpregs_mem, 0, sizeof(uc->uc_fpregs_mem));
213 }
214 
215 static void
216 linux_save_sigcontext(l, tf, mask, sc)
217 	struct lwp *l;
218 	struct trapframe *tf;
219 	const sigset_t *mask;
220 	struct linux_sigcontext *sc;
221 {
222 	/* Save register context. */
223 #ifdef VM86
224 	if (tf->tf_eflags & PSL_VM) {
225 		sc->sc_gs = tf->tf_vm86_gs;
226 		sc->sc_fs = tf->tf_vm86_fs;
227 		sc->sc_es = tf->tf_vm86_es;
228 		sc->sc_ds = tf->tf_vm86_ds;
229 		sc->sc_eflags = get_vflags(l);
230 	} else
231 #endif
232 	{
233 		sc->sc_gs = tf->tf_gs;
234 		sc->sc_fs = tf->tf_fs;
235 		sc->sc_es = tf->tf_es;
236 		sc->sc_ds = tf->tf_ds;
237 		sc->sc_eflags = tf->tf_eflags;
238 	}
239 	sc->sc_edi = tf->tf_edi;
240 	sc->sc_esi = tf->tf_esi;
241 	sc->sc_esp = tf->tf_esp;
242 	sc->sc_ebp = tf->tf_ebp;
243 	sc->sc_ebx = tf->tf_ebx;
244 	sc->sc_edx = tf->tf_edx;
245 	sc->sc_ecx = tf->tf_ecx;
246 	sc->sc_eax = tf->tf_eax;
247 	sc->sc_eip = tf->tf_eip;
248 	sc->sc_cs = tf->tf_cs;
249 	sc->sc_esp_at_signal = tf->tf_esp;
250 	sc->sc_ss = tf->tf_ss;
251 	sc->sc_err = tf->tf_err;
252 	sc->sc_trapno = tf->tf_trapno;
253 	sc->sc_cr2 = l->l_addr->u_pcb.pcb_cr2;
254 	sc->sc_387 = NULL;
255 
256 	/* Save signal stack. */
257 	/* Linux doesn't save the onstack flag in sigframe */
258 
259 	/* Save signal mask. */
260 	native_to_linux_old_sigset(&sc->sc_mask, mask);
261 }
262 
263 static void
264 linux_rt_sendsig(const ksiginfo_t *ksi, const sigset_t *mask)
265 {
266 	struct lwp *l = curlwp;
267 	struct proc *p = l->l_proc;
268 	struct trapframe *tf;
269 	struct linux_rt_sigframe *fp, frame;
270 	int onstack, error;
271 	linux_siginfo_t *lsi;
272 	int sig = ksi->ksi_signo;
273 	sig_t catcher = SIGACTION(p, sig).sa_handler;
274 	struct sigaltstack *sas = &l->l_sigstk;
275 
276 	tf = l->l_md.md_regs;
277 	/* Do we need to jump onto the signal stack? */
278 	onstack = (sas->ss_flags & (SS_DISABLE | SS_ONSTACK)) == 0 &&
279 	    (SIGACTION(p, sig).sa_flags & SA_ONSTACK) != 0;
280 
281 
282 	/* Allocate space for the signal handler context. */
283 	if (onstack)
284 		fp = (struct linux_rt_sigframe *)((char *)sas->ss_sp +
285 		    sas->ss_size);
286 	else
287 		fp = (struct linux_rt_sigframe *)tf->tf_esp;
288 	fp--;
289 
290 	DPRINTF(("rt: onstack = %d, fp = %p sig = %d eip = 0x%x cr2 = 0x%x\n",
291 	    onstack, fp, sig, tf->tf_eip, l->l_addr->u_pcb.pcb_cr2));
292 
293 	/* Build stack frame for signal trampoline. */
294 	frame.sf_handler = catcher;
295 	frame.sf_sig = native_to_linux_signo[sig];
296 	frame.sf_sip = &fp->sf_si;
297 	frame.sf_ucp = &fp->sf_uc;
298 
299 	/*
300 	 * XXX: the following code assumes that the constants for
301 	 * siginfo are the same between linux and NetBSD.
302 	 */
303 	(void)memset(lsi = &frame.sf_si, 0, sizeof(frame.sf_si));
304 	lsi->lsi_errno = native_to_linux_errno[ksi->ksi_errno];
305 	lsi->lsi_code = ksi->ksi_code;
306 	switch (lsi->lsi_signo = frame.sf_sig) {
307 	case LINUX_SIGILL:
308 	case LINUX_SIGFPE:
309 	case LINUX_SIGSEGV:
310 	case LINUX_SIGBUS:
311 	case LINUX_SIGTRAP:
312 		lsi->lsi_addr = ksi->ksi_addr;
313 		break;
314 	case LINUX_SIGCHLD:
315 		lsi->lsi_uid = ksi->ksi_uid;
316 		lsi->lsi_pid = ksi->ksi_pid;
317 		lsi->lsi_utime = ksi->ksi_utime;
318 		lsi->lsi_stime = ksi->ksi_stime;
319 
320 		/* We use the same codes */
321 		lsi->lsi_code = ksi->ksi_code;
322 		/* XXX is that right? */
323 		lsi->lsi_status = WEXITSTATUS(ksi->ksi_status);
324 		break;
325 	case LINUX_SIGIO:
326 		lsi->lsi_band = ksi->ksi_band;
327 		lsi->lsi_fd = ksi->ksi_fd;
328 		break;
329 	default:
330 		lsi->lsi_uid = ksi->ksi_uid;
331 		lsi->lsi_pid = ksi->ksi_pid;
332 		if (lsi->lsi_signo == LINUX_SIGALRM ||
333 		    lsi->lsi_signo >= LINUX_SIGRTMIN)
334 			lsi->lsi_value.sival_ptr = ksi->ksi_value.sival_ptr;
335 		break;
336 	}
337 
338 	/* Save register context. */
339 	linux_save_ucontext(l, tf, mask, sas, &frame.sf_uc);
340 	sendsig_reset(l, sig);
341 
342 	mutex_exit(&p->p_smutex);
343 	error = copyout(&frame, fp, sizeof(frame));
344 	mutex_enter(&p->p_smutex);
345 
346 	if (error != 0) {
347 		/*
348 		 * Process has trashed its stack; give it an illegal
349 		 * instruction to halt it in its tracks.
350 		 */
351 		sigexit(l, SIGILL);
352 		/* NOTREACHED */
353 	}
354 
355 	/*
356 	 * Build context to run handler in.
357 	 */
358 	tf->tf_gs = GSEL(GUDATA_SEL, SEL_UPL);
359 	tf->tf_fs = GSEL(GUDATA_SEL, SEL_UPL);
360 	tf->tf_es = GSEL(GUDATA_SEL, SEL_UPL);
361 	tf->tf_ds = GSEL(GUDATA_SEL, SEL_UPL);
362 	tf->tf_eip = ((int)p->p_sigctx.ps_sigcode) +
363 	    (linux_rt_sigcode - linux_sigcode);
364 	tf->tf_cs = GSEL(GUCODE_SEL, SEL_UPL);
365 	tf->tf_eflags &= ~(PSL_T|PSL_VM|PSL_AC);
366 	tf->tf_esp = (int)fp;
367 	tf->tf_ss = GSEL(GUDATA_SEL, SEL_UPL);
368 
369 	/* Remember that we're now on the signal stack. */
370 	if (onstack)
371 		sas->ss_flags |= SS_ONSTACK;
372 }
373 
374 static void
375 linux_old_sendsig(const ksiginfo_t *ksi, const sigset_t *mask)
376 {
377 	struct lwp *l = curlwp;
378 	struct proc *p = l->l_proc;
379 	struct trapframe *tf;
380 	struct linux_sigframe *fp, frame;
381 	int onstack, error;
382 	int sig = ksi->ksi_signo;
383 	sig_t catcher = SIGACTION(p, sig).sa_handler;
384 	struct sigaltstack *sas = &l->l_sigstk;
385 
386 	tf = l->l_md.md_regs;
387 
388 	/* Do we need to jump onto the signal stack? */
389 	onstack = (sas->ss_flags & (SS_DISABLE | SS_ONSTACK)) == 0 &&
390 	    (SIGACTION(p, sig).sa_flags & SA_ONSTACK) != 0;
391 
392 	/* Allocate space for the signal handler context. */
393 	if (onstack)
394 		fp = (struct linux_sigframe *) ((char *)sas->ss_sp +
395 		    sas->ss_size);
396 	else
397 		fp = (struct linux_sigframe *)tf->tf_esp;
398 	fp--;
399 
400 	DPRINTF(("old: onstack = %d, fp = %p sig = %d eip = 0x%x cr2 = 0x%x\n",
401 	    onstack, fp, sig, tf->tf_eip, l->l_addr->u_pcb.pcb_cr2));
402 
403 	/* Build stack frame for signal trampoline. */
404 	frame.sf_handler = catcher;
405 	frame.sf_sig = native_to_linux_signo[sig];
406 
407 	linux_save_sigcontext(l, tf, mask, &frame.sf_sc);
408 	sendsig_reset(l, sig);
409 
410 	mutex_exit(&p->p_smutex);
411 	error = copyout(&frame, fp, sizeof(frame));
412 	mutex_enter(&p->p_smutex);
413 
414 	if (error != 0) {
415 		/*
416 		 * Process has trashed its stack; give it an illegal
417 		 * instruction to halt it in its tracks.
418 		 */
419 		sigexit(l, SIGILL);
420 		/* NOTREACHED */
421 	}
422 
423 	/*
424 	 * Build context to run handler in.
425 	 */
426 	tf->tf_gs = GSEL(GUDATA_SEL, SEL_UPL);
427 	tf->tf_fs = GSEL(GUDATA_SEL, SEL_UPL);
428 	tf->tf_es = GSEL(GUDATA_SEL, SEL_UPL);
429 	tf->tf_ds = GSEL(GUDATA_SEL, SEL_UPL);
430 	tf->tf_eip = (int)p->p_sigctx.ps_sigcode;
431 	tf->tf_cs = GSEL(GUCODEBIG_SEL, SEL_UPL);
432 	tf->tf_eflags &= ~(PSL_T|PSL_VM|PSL_AC);
433 	tf->tf_esp = (int)fp;
434 	tf->tf_ss = GSEL(GUDATA_SEL, SEL_UPL);
435 
436 	/* Remember that we're now on the signal stack. */
437 	if (onstack)
438 		sas->ss_flags |= SS_ONSTACK;
439 }
440 
441 /*
442  * System call to cleanup state after a signal
443  * has been taken.  Reset signal mask and
444  * stack state from context left by sendsig (above).
445  * Return to previous pc and psl as specified by
446  * context left by sendsig. Check carefully to
447  * make sure that the user has not modified the
448  * psl to gain improper privileges or to cause
449  * a machine fault.
450  */
451 int
452 linux_sys_rt_sigreturn(l, v, retval)
453 	struct lwp *l;
454 	void *v;
455 	register_t *retval;
456 {
457 	struct linux_sys_rt_sigreturn_args /* {
458 		syscallarg(struct linux_ucontext *) ucp;
459 	} */ *uap = v;
460 	struct linux_ucontext context, *ucp = SCARG(uap, ucp);
461 	int error;
462 
463 	/*
464 	 * The trampoline code hands us the context.
465 	 * It is unsafe to keep track of it ourselves, in the event that a
466 	 * program jumps out of a signal handler.
467 	 */
468 	if ((error = copyin(ucp, &context, sizeof(*ucp))) != 0)
469 		return error;
470 
471 	/* XXX XAX we can do better here by using more of the ucontext */
472 	return linux_restore_sigcontext(l, &context.uc_mcontext, retval);
473 }
474 
475 int
476 linux_sys_sigreturn(l, v, retval)
477 	struct lwp *l;
478 	void *v;
479 	register_t *retval;
480 {
481 	struct linux_sys_sigreturn_args /* {
482 		syscallarg(struct linux_sigcontext *) scp;
483 	} */ *uap = v;
484 	struct linux_sigcontext context, *scp = SCARG(uap, scp);
485 	int error;
486 
487 	/*
488 	 * The trampoline code hands us the context.
489 	 * It is unsafe to keep track of it ourselves, in the event that a
490 	 * program jumps out of a signal handler.
491 	 */
492 	if ((error = copyin((void *)scp, &context, sizeof(*scp))) != 0)
493 		return error;
494 	return linux_restore_sigcontext(l, &context, retval);
495 }
496 
497 static int
498 linux_restore_sigcontext(struct lwp *l, struct linux_sigcontext *scp,
499     register_t *retval)
500 {
501 	struct proc *p = l->l_proc;
502 	struct sigaltstack *sas = &l->l_sigstk;
503 	struct trapframe *tf;
504 	sigset_t mask;
505 	ssize_t ss_gap;
506 	/* Restore register context. */
507 	tf = l->l_md.md_regs;
508 
509 	DPRINTF(("sigreturn enter esp=%x eip=%x\n", tf->tf_esp, tf->tf_eip));
510 #ifdef VM86
511 	if (scp->sc_eflags & PSL_VM) {
512 		void syscall_vm86 __P((struct trapframe *));
513 
514 		tf->tf_vm86_gs = scp->sc_gs;
515 		tf->tf_vm86_fs = scp->sc_fs;
516 		tf->tf_vm86_es = scp->sc_es;
517 		tf->tf_vm86_ds = scp->sc_ds;
518 		set_vflags(l, scp->sc_eflags);
519 		p->p_md.md_syscall = syscall_vm86;
520 	} else
521 #endif
522 	{
523 		/*
524 		 * Check for security violations.  If we're returning to
525 		 * protected mode, the CPU will validate the segment registers
526 		 * automatically and generate a trap on violations.  We handle
527 		 * the trap, rather than doing all of the checking here.
528 		 */
529 		if (((scp->sc_eflags ^ tf->tf_eflags) & PSL_USERSTATIC) != 0 ||
530 		    !USERMODE(scp->sc_cs, scp->sc_eflags))
531 			return EINVAL;
532 
533 		tf->tf_gs = scp->sc_gs;
534 		tf->tf_fs = scp->sc_fs;
535 		tf->tf_es = scp->sc_es;
536 		tf->tf_ds = scp->sc_ds;
537 #ifdef VM86
538 		if (tf->tf_eflags & PSL_VM)
539 			(*p->p_emul->e_syscall_intern)(p);
540 #endif
541 		tf->tf_eflags = scp->sc_eflags;
542 	}
543 	tf->tf_edi = scp->sc_edi;
544 	tf->tf_esi = scp->sc_esi;
545 	tf->tf_ebp = scp->sc_ebp;
546 	tf->tf_ebx = scp->sc_ebx;
547 	tf->tf_edx = scp->sc_edx;
548 	tf->tf_ecx = scp->sc_ecx;
549 	tf->tf_eax = scp->sc_eax;
550 	tf->tf_eip = scp->sc_eip;
551 	tf->tf_cs = scp->sc_cs;
552 	tf->tf_esp = scp->sc_esp_at_signal;
553 	tf->tf_ss = scp->sc_ss;
554 
555 	/* Restore signal stack. */
556 	/*
557 	 * Linux really does it this way; it doesn't have space in sigframe
558 	 * to save the onstack flag.
559 	 */
560 	mutex_enter(&p->p_smutex);
561 	ss_gap = (ssize_t)((char *)scp->sc_esp_at_signal - (char *)sas->ss_sp);
562 	if (ss_gap >= 0 && ss_gap < sas->ss_size)
563 		sas->ss_flags |= SS_ONSTACK;
564 	else
565 		sas->ss_flags &= ~SS_ONSTACK;
566 
567 	/* Restore signal mask. */
568 	linux_old_to_native_sigset(&mask, &scp->sc_mask);
569 	(void) sigprocmask1(l, SIG_SETMASK, &mask, 0);
570 	mutex_exit(&p->p_smutex);
571 
572 	DPRINTF(("sigreturn exit esp=%x eip=%x\n", tf->tf_esp, tf->tf_eip));
573 	return EJUSTRETURN;
574 }
575 
576 #ifdef USER_LDT
577 
578 static int
579 linux_read_ldt(struct lwp *l, struct linux_sys_modify_ldt_args *uap,
580     register_t *retval)
581 {
582 	struct x86_get_ldt_args gl;
583 	int error;
584 	int num_ldt;
585 	union descriptor *ldt_buf;
586 
587 	/*
588 	 * I've checked the linux code - this function is asymetric with
589 	 * linux_write_ldt, and returns raw ldt entries.
590 	 * NB, the code I saw zerod the spare parts of the user buffer.
591 	 */
592 
593 	DPRINTF(("linux_read_ldt!"));
594 
595 	num_ldt = x86_get_ldt_len(l);
596 	if (num_ldt <= 0)
597 		return EINVAL;
598 
599 	gl.start = 0;
600 	gl.desc = NULL;
601 	gl.num = SCARG(uap, bytecount) / sizeof(union descriptor);
602 
603 	if (gl.num > num_ldt)
604 		gl.num = num_ldt;
605 
606 	ldt_buf = malloc(gl.num * sizeof *ldt, M_TEMP, M_WAITOK);
607 
608 	error = x86_get_ldt1(l, &gl, ldt_buf);
609 	/* NB gl.num might have changed */
610 	if (error == 0) {
611 		*retval = gl.num * sizeof *ldt;
612 		error = copyout(ldt_buf, SCARG(uap, ptr),
613 		    gl.num * sizeof *ldt_buf);
614 	}
615 	free(ldt, M_TEMP);
616 
617 	return error;
618 }
619 
620 struct linux_ldt_info {
621 	u_int entry_number;
622 	u_long base_addr;
623 	u_int limit;
624 	u_int seg_32bit:1;
625 	u_int contents:2;
626 	u_int read_exec_only:1;
627 	u_int limit_in_pages:1;
628 	u_int seg_not_present:1;
629 	u_int useable:1;
630 };
631 
632 static int
633 linux_write_ldt(struct lwp *l, struct linux_sys_modify_ldt_args *uap,
634     int oldmode)
635 {
636 	struct linux_ldt_info ldt_info;
637 	union descriptor d;
638 	struct x86_set_ldt_args sl;
639 	int error;
640 
641 	DPRINTF(("linux_write_ldt %d\n", oldmode));
642 	if (SCARG(uap, bytecount) != sizeof(ldt_info))
643 		return (EINVAL);
644 	if ((error = copyin(SCARG(uap, ptr), &ldt_info, sizeof(ldt_info))) != 0)
645 		return error;
646 	if (ldt_info.entry_number >= 8192)
647 		return (EINVAL);
648 	if (ldt_info.contents == 3) {
649 		if (oldmode)
650 			return (EINVAL);
651 		if (ldt_info.seg_not_present)
652 			return (EINVAL);
653 	}
654 
655 	if (ldt_info.base_addr == 0 && ldt_info.limit == 0 &&
656 	    (oldmode || (ldt_info.contents == 0 &&
657 	    ldt_info.read_exec_only == 1 && ldt_info.seg_32bit == 0 &&
658 	    ldt_info.limit_in_pages == 0 && ldt_info.seg_not_present == 1 &&
659 	    ldt_info.useable == 0))) {
660 		/* this means you should zero the ldt */
661 		(void)memset(&d, 0, sizeof(d));
662 	} else {
663 		d.sd.sd_lobase = ldt_info.base_addr & 0xffffff;
664 		d.sd.sd_hibase = (ldt_info.base_addr >> 24) & 0xff;
665 		d.sd.sd_lolimit = ldt_info.limit & 0xffff;
666 		d.sd.sd_hilimit = (ldt_info.limit >> 16) & 0xf;
667 		d.sd.sd_type = 16 | (ldt_info.contents << 2) |
668 		    (!ldt_info.read_exec_only << 1);
669 		d.sd.sd_dpl = SEL_UPL;
670 		d.sd.sd_p = !ldt_info.seg_not_present;
671 		d.sd.sd_def32 = ldt_info.seg_32bit;
672 		d.sd.sd_gran = ldt_info.limit_in_pages;
673 		if (!oldmode)
674 			d.sd.sd_xx = ldt_info.useable;
675 		else
676 			d.sd.sd_xx = 0;
677 	}
678 	sl.start = ldt_info.entry_number;
679 	sl.desc = NULL;;
680 	sl.num = 1;
681 
682 	DPRINTF(("linux_write_ldt: idx=%d, base=0x%lx, limit=0x%x\n",
683 	    ldt_info.entry_number, ldt_info.base_addr, ldt_info.limit));
684 
685 	return x86_set_ldt1(l, &sl, &d);
686 }
687 
688 #endif /* USER_LDT */
689 
690 int
691 linux_sys_modify_ldt(struct lwp *l, void *v,
692     register_t *retval)
693 {
694 	struct linux_sys_modify_ldt_args /* {
695 		syscallarg(int) func;
696 		syscallarg(void *) ptr;
697 		syscallarg(size_t) bytecount;
698 	} */ *uap = v;
699 
700 	switch (SCARG(uap, func)) {
701 #ifdef USER_LDT
702 	case 0:
703 		return linux_read_ldt(l, uap, retval);
704 	case 1:
705 		return linux_write_ldt(l, uap, 1);
706 	case 2:
707 #ifdef notyet
708 		return (linux_read_default_ldt(l, uap, retval);
709 #else
710 		return (ENOSYS);
711 #endif
712 	case 0x11:
713 		return linux_write_ldt(l, uap, 0);
714 #endif /* USER_LDT */
715 
716 	default:
717 		return (ENOSYS);
718 	}
719 }
720 
721 /*
722  * XXX Pathetic hack to make svgalib work. This will fake the major
723  * device number of an opened VT so that svgalib likes it. grmbl.
724  * Should probably do it 'wrong the right way' and use a mapping
725  * array for all major device numbers, and map linux_mknod too.
726  */
727 dev_t
728 linux_fakedev(dev, raw)
729 	dev_t dev;
730 	int raw;
731 {
732 	extern const struct cdevsw ptc_cdevsw, pts_cdevsw;
733 	const struct cdevsw *cd = cdevsw_lookup(dev);
734 
735 	if (raw) {
736 #if (NWSDISPLAY > 0)
737 		extern const struct cdevsw wsdisplay_cdevsw;
738 		if (cd == &wsdisplay_cdevsw)
739 			return makedev(LINUX_CONS_MAJOR, (minor(dev) + 1));
740 #endif
741 	}
742 
743 	if (cd == &ptc_cdevsw)
744 		return makedev(LINUX_PTC_MAJOR, minor(dev));
745 	if (cd == &pts_cdevsw)
746 		return makedev(LINUX_PTS_MAJOR, minor(dev));
747 
748 	return dev;
749 }
750 
751 #if (NWSDISPLAY > 0)
752 /*
753  * That's not complete, but enough to get an X server running.
754  */
755 #define NR_KEYS 128
756 static const u_short plain_map[NR_KEYS] = {
757 	0x0200,	0x001b,	0x0031,	0x0032,	0x0033,	0x0034,	0x0035,	0x0036,
758 	0x0037,	0x0038,	0x0039,	0x0030,	0x002d,	0x003d,	0x007f,	0x0009,
759 	0x0b71,	0x0b77,	0x0b65,	0x0b72,	0x0b74,	0x0b79,	0x0b75,	0x0b69,
760 	0x0b6f,	0x0b70,	0x005b,	0x005d,	0x0201,	0x0702,	0x0b61,	0x0b73,
761 	0x0b64,	0x0b66,	0x0b67,	0x0b68,	0x0b6a,	0x0b6b,	0x0b6c,	0x003b,
762 	0x0027,	0x0060,	0x0700,	0x005c,	0x0b7a,	0x0b78,	0x0b63,	0x0b76,
763 	0x0b62,	0x0b6e,	0x0b6d,	0x002c,	0x002e,	0x002f,	0x0700,	0x030c,
764 	0x0703,	0x0020,	0x0207,	0x0100,	0x0101,	0x0102,	0x0103,	0x0104,
765 	0x0105,	0x0106,	0x0107,	0x0108,	0x0109,	0x0208,	0x0209,	0x0307,
766 	0x0308,	0x0309,	0x030b,	0x0304,	0x0305,	0x0306,	0x030a,	0x0301,
767 	0x0302,	0x0303,	0x0300,	0x0310,	0x0206,	0x0200,	0x003c,	0x010a,
768 	0x010b,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
769 	0x030e,	0x0702,	0x030d,	0x001c,	0x0701,	0x0205,	0x0114,	0x0603,
770 	0x0118,	0x0601,	0x0602,	0x0117,	0x0600,	0x0119,	0x0115,	0x0116,
771 	0x011a,	0x010c,	0x010d,	0x011b,	0x011c,	0x0110,	0x0311,	0x011d,
772 	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
773 }, shift_map[NR_KEYS] = {
774 	0x0200,	0x001b,	0x0021,	0x0040,	0x0023,	0x0024,	0x0025,	0x005e,
775 	0x0026,	0x002a,	0x0028,	0x0029,	0x005f,	0x002b,	0x007f,	0x0009,
776 	0x0b51,	0x0b57,	0x0b45,	0x0b52,	0x0b54,	0x0b59,	0x0b55,	0x0b49,
777 	0x0b4f,	0x0b50,	0x007b,	0x007d,	0x0201,	0x0702,	0x0b41,	0x0b53,
778 	0x0b44,	0x0b46,	0x0b47,	0x0b48,	0x0b4a,	0x0b4b,	0x0b4c,	0x003a,
779 	0x0022,	0x007e,	0x0700,	0x007c,	0x0b5a,	0x0b58,	0x0b43,	0x0b56,
780 	0x0b42,	0x0b4e,	0x0b4d,	0x003c,	0x003e,	0x003f,	0x0700,	0x030c,
781 	0x0703,	0x0020,	0x0207,	0x010a,	0x010b,	0x010c,	0x010d,	0x010e,
782 	0x010f,	0x0110,	0x0111,	0x0112,	0x0113,	0x0213,	0x0203,	0x0307,
783 	0x0308,	0x0309,	0x030b,	0x0304,	0x0305,	0x0306,	0x030a,	0x0301,
784 	0x0302,	0x0303,	0x0300,	0x0310,	0x0206,	0x0200,	0x003e,	0x010a,
785 	0x010b,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
786 	0x030e,	0x0702,	0x030d,	0x0200,	0x0701,	0x0205,	0x0114,	0x0603,
787 	0x020b,	0x0601,	0x0602,	0x0117,	0x0600,	0x020a,	0x0115,	0x0116,
788 	0x011a,	0x010c,	0x010d,	0x011b,	0x011c,	0x0110,	0x0311,	0x011d,
789 	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
790 }, altgr_map[NR_KEYS] = {
791 	0x0200,	0x0200,	0x0200,	0x0040,	0x0200,	0x0024,	0x0200,	0x0200,
792 	0x007b,	0x005b,	0x005d,	0x007d,	0x005c,	0x0200,	0x0200,	0x0200,
793 	0x0b71,	0x0b77,	0x0918,	0x0b72,	0x0b74,	0x0b79,	0x0b75,	0x0b69,
794 	0x0b6f,	0x0b70,	0x0200,	0x007e,	0x0201,	0x0702,	0x0914,	0x0b73,
795 	0x0917,	0x0919,	0x0b67,	0x0b68,	0x0b6a,	0x0b6b,	0x0b6c,	0x0200,
796 	0x0200,	0x0200,	0x0700,	0x0200,	0x0b7a,	0x0b78,	0x0916,	0x0b76,
797 	0x0915,	0x0b6e,	0x0b6d,	0x0200,	0x0200,	0x0200,	0x0700,	0x030c,
798 	0x0703,	0x0200,	0x0207,	0x050c,	0x050d,	0x050e,	0x050f,	0x0510,
799 	0x0511,	0x0512,	0x0513,	0x0514,	0x0515,	0x0208,	0x0202,	0x0911,
800 	0x0912,	0x0913,	0x030b,	0x090e,	0x090f,	0x0910,	0x030a,	0x090b,
801 	0x090c,	0x090d,	0x090a,	0x0310,	0x0206,	0x0200,	0x007c,	0x0516,
802 	0x0517,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
803 	0x030e,	0x0702,	0x030d,	0x0200,	0x0701,	0x0205,	0x0114,	0x0603,
804 	0x0118,	0x0601,	0x0602,	0x0117,	0x0600,	0x0119,	0x0115,	0x0116,
805 	0x011a,	0x010c,	0x010d,	0x011b,	0x011c,	0x0110,	0x0311,	0x011d,
806 	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
807 }, ctrl_map[NR_KEYS] = {
808 	0x0200,	0x0200,	0x0200,	0x0000,	0x001b,	0x001c,	0x001d,	0x001e,
809 	0x001f,	0x007f,	0x0200,	0x0200,	0x001f,	0x0200,	0x0008,	0x0200,
810 	0x0011,	0x0017,	0x0005,	0x0012,	0x0014,	0x0019,	0x0015,	0x0009,
811 	0x000f,	0x0010,	0x001b,	0x001d,	0x0201,	0x0702,	0x0001,	0x0013,
812 	0x0004,	0x0006,	0x0007,	0x0008,	0x000a,	0x000b,	0x000c,	0x0200,
813 	0x0007,	0x0000,	0x0700,	0x001c,	0x001a,	0x0018,	0x0003,	0x0016,
814 	0x0002,	0x000e,	0x000d,	0x0200,	0x020e,	0x007f,	0x0700,	0x030c,
815 	0x0703,	0x0000,	0x0207,	0x0100,	0x0101,	0x0102,	0x0103,	0x0104,
816 	0x0105,	0x0106,	0x0107,	0x0108,	0x0109,	0x0208,	0x0204,	0x0307,
817 	0x0308,	0x0309,	0x030b,	0x0304,	0x0305,	0x0306,	0x030a,	0x0301,
818 	0x0302,	0x0303,	0x0300,	0x0310,	0x0206,	0x0200,	0x0200,	0x010a,
819 	0x010b,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
820 	0x030e,	0x0702,	0x030d,	0x001c,	0x0701,	0x0205,	0x0114,	0x0603,
821 	0x0118,	0x0601,	0x0602,	0x0117,	0x0600,	0x0119,	0x0115,	0x0116,
822 	0x011a,	0x010c,	0x010d,	0x011b,	0x011c,	0x0110,	0x0311,	0x011d,
823 	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
824 };
825 
826 const u_short * const linux_keytabs[] = {
827 	plain_map, shift_map, altgr_map, altgr_map, ctrl_map
828 };
829 #endif
830 
831 static struct biosdisk_info *
832 fd2biosinfo(struct proc *p, struct file *fp)
833 {
834 	struct vnode *vp;
835 	const char *blkname;
836 	char diskname[16];
837 	int i;
838 	struct nativedisk_info *nip;
839 	struct disklist *dl = x86_alldisks;
840 
841 	if (fp->f_type != DTYPE_VNODE)
842 		return NULL;
843 	vp = (struct vnode *)fp->f_data;
844 
845 	if (vp->v_type != VBLK)
846 		return NULL;
847 
848 	blkname = devsw_blk2name(major(vp->v_rdev));
849 	snprintf(diskname, sizeof diskname, "%s%u", blkname,
850 	    DISKUNIT(vp->v_rdev));
851 
852 	for (i = 0; i < dl->dl_nnativedisks; i++) {
853 		nip = &dl->dl_nativedisks[i];
854 		if (strcmp(diskname, nip->ni_devname))
855 			continue;
856 		if (nip->ni_nmatches != 0)
857 			return &dl->dl_biosdisks[nip->ni_biosmatches[0]];
858 	}
859 
860 	return NULL;
861 }
862 
863 
864 /*
865  * We come here in a last attempt to satisfy a Linux ioctl() call
866  */
867 int
868 linux_machdepioctl(l, v, retval)
869 	struct lwp *l;
870 	void *v;
871 	register_t *retval;
872 {
873 	struct linux_sys_ioctl_args /* {
874 		syscallarg(int) fd;
875 		syscallarg(u_long) com;
876 		syscallarg(void *) data;
877 	} */ *uap = v;
878 	struct sys_ioctl_args bia;
879 	u_long com;
880 	int error, error1;
881 #if (NWSDISPLAY > 0)
882 	struct vt_mode lvt;
883 	struct kbentry kbe;
884 #endif
885 	struct linux_hd_geometry hdg;
886 	struct linux_hd_big_geometry hdg_big;
887 	struct biosdisk_info *bip;
888 	struct filedesc *fdp;
889 	struct file *fp;
890 	int fd;
891 	struct disklabel label, *labp;
892 	struct partinfo partp;
893 	int (*ioctlf)(struct file *, u_long, void *, struct lwp *);
894 	u_long start, biostotal, realtotal;
895 	u_char heads, sectors;
896 	u_int cylinders;
897 	struct ioctl_pt pt;
898 	struct proc *p = l->l_proc;
899 
900 	fd = SCARG(uap, fd);
901 	SCARG(&bia, fd) = fd;
902 	SCARG(&bia, data) = SCARG(uap, data);
903 	com = SCARG(uap, com);
904 
905 	fdp = p->p_fd;
906 
907 	if ((fp = fd_getfile(fdp, fd)) == NULL)
908 		return (EBADF);
909 
910 	FILE_USE(fp);
911 
912 	switch (com) {
913 #if (NWSDISPLAY > 0)
914 	case LINUX_KDGKBMODE:
915 		com = KDGKBMODE;
916 		break;
917 	case LINUX_KDSKBMODE:
918 		com = KDSKBMODE;
919 		if ((unsigned)SCARG(uap, data) == LINUX_K_MEDIUMRAW)
920 			SCARG(&bia, data) = (void *)K_RAW;
921 		break;
922 	case LINUX_KIOCSOUND:
923 		SCARG(&bia, data) =
924 		    (void *)(((unsigned long)SCARG(&bia, data)) & 0xffff);
925 		/* fall through */
926 	case LINUX_KDMKTONE:
927 		com = KDMKTONE;
928 		break;
929 	case LINUX_KDSETMODE:
930 		com = KDSETMODE;
931 		break;
932 	case LINUX_KDGETMODE:
933 		/* KD_* values are equal to the wscons numbers */
934 		com = WSDISPLAYIO_GMODE;
935 		break;
936 	case LINUX_KDENABIO:
937 		com = KDENABIO;
938 		break;
939 	case LINUX_KDDISABIO:
940 		com = KDDISABIO;
941 		break;
942 	case LINUX_KDGETLED:
943 		com = KDGETLED;
944 		break;
945 	case LINUX_KDSETLED:
946 		com = KDSETLED;
947 		break;
948 	case LINUX_VT_OPENQRY:
949 		com = VT_OPENQRY;
950 		break;
951 	case LINUX_VT_GETMODE:
952 		error = fp->f_ops->fo_ioctl(fp, VT_GETMODE, &lvt, l);
953 		if (error != 0)
954 			goto out;
955 		lvt.relsig = native_to_linux_signo[lvt.relsig];
956 		lvt.acqsig = native_to_linux_signo[lvt.acqsig];
957 		lvt.frsig = native_to_linux_signo[lvt.frsig];
958 		error = copyout(&lvt, SCARG(uap, data), sizeof (lvt));
959 		goto out;
960 	case LINUX_VT_SETMODE:
961 		error = copyin(SCARG(uap, data), &lvt, sizeof (lvt));
962 		if (error != 0)
963 			goto out;
964 		lvt.relsig = linux_to_native_signo[lvt.relsig];
965 		lvt.acqsig = linux_to_native_signo[lvt.acqsig];
966 		lvt.frsig = linux_to_native_signo[lvt.frsig];
967 		error = fp->f_ops->fo_ioctl(fp, VT_SETMODE, &lvt, l);
968 		goto out;
969 	case LINUX_VT_DISALLOCATE:
970 		/* XXX should use WSDISPLAYIO_DELSCREEN */
971 		error = 0;
972 		goto out;
973 	case LINUX_VT_RELDISP:
974 		com = VT_RELDISP;
975 		break;
976 	case LINUX_VT_ACTIVATE:
977 		com = VT_ACTIVATE;
978 		break;
979 	case LINUX_VT_WAITACTIVE:
980 		com = VT_WAITACTIVE;
981 		break;
982 	case LINUX_VT_GETSTATE:
983 		com = VT_GETSTATE;
984 		break;
985 	case LINUX_KDGKBTYPE:
986 	    {
987 		static const u_int8_t kb101 = KB_101;
988 
989 		/* This is what Linux does. */
990 		error = copyout(&kb101, SCARG(uap, data), 1);
991 		goto out;
992 	    }
993 	case LINUX_KDGKBENT:
994 		/*
995 		 * The Linux KDGKBENT ioctl is different from the
996 		 * SYSV original. So we handle it in machdep code.
997 		 * XXX We should use keyboard mapping information
998 		 * from wsdisplay, but this would be expensive.
999 		 */
1000 		if ((error = copyin(SCARG(uap, data), &kbe,
1001 				    sizeof(struct kbentry))))
1002 			goto out;
1003 		if (kbe.kb_table >= sizeof(linux_keytabs) / sizeof(u_short *)
1004 		    || kbe.kb_index >= NR_KEYS) {
1005 			error = EINVAL;
1006 			goto out;
1007 		}
1008 		kbe.kb_value = linux_keytabs[kbe.kb_table][kbe.kb_index];
1009 		error = copyout(&kbe, SCARG(uap, data),
1010 				sizeof(struct kbentry));
1011 		goto out;
1012 #endif
1013 	case LINUX_HDIO_GETGEO:
1014 	case LINUX_HDIO_GETGEO_BIG:
1015 		/*
1016 		 * Try to mimic Linux behaviour: return the BIOS geometry
1017 		 * if possible (extending its # of cylinders if it's beyond
1018 		 * the 1023 limit), fall back to the MI geometry (i.e.
1019 		 * the real geometry) if not found, by returning an
1020 		 * error. See common/linux_hdio.c
1021 		 */
1022 		bip = fd2biosinfo(p, fp);
1023 		ioctlf = fp->f_ops->fo_ioctl;
1024 		error = ioctlf(fp, DIOCGDEFLABEL, (void *)&label, l);
1025 		error1 = ioctlf(fp, DIOCGPART, (void *)&partp, l);
1026 		if (error != 0 && error1 != 0) {
1027 			error = error1;
1028 			goto out;
1029 		}
1030 		labp = error != 0 ? &label : partp.disklab;
1031 		start = error1 != 0 ? partp.part->p_offset : 0;
1032 		if (bip != NULL && bip->bi_head != 0 && bip->bi_sec != 0
1033 		    && bip->bi_cyl != 0) {
1034 			heads = bip->bi_head;
1035 			sectors = bip->bi_sec;
1036 			cylinders = bip->bi_cyl;
1037 			biostotal = heads * sectors * cylinders;
1038 			realtotal = labp->d_ntracks * labp->d_nsectors *
1039 			    labp->d_ncylinders;
1040 			if (realtotal > biostotal)
1041 				cylinders = realtotal / (heads * sectors);
1042 		} else {
1043 			heads = labp->d_ntracks;
1044 			cylinders = labp->d_ncylinders;
1045 			sectors = labp->d_nsectors;
1046 		}
1047 		if (com == LINUX_HDIO_GETGEO) {
1048 			hdg.start = start;
1049 			hdg.heads = heads;
1050 			hdg.cylinders = cylinders;
1051 			hdg.sectors = sectors;
1052 			error = copyout(&hdg, SCARG(uap, data), sizeof hdg);
1053 			goto out;
1054 		} else {
1055 			hdg_big.start = start;
1056 			hdg_big.heads = heads;
1057 			hdg_big.cylinders = cylinders;
1058 			hdg_big.sectors = sectors;
1059 			error = copyout(&hdg_big, SCARG(uap, data),
1060 			    sizeof hdg_big);
1061 			goto out;
1062 		}
1063 
1064 	default:
1065 		/*
1066 		 * Unknown to us. If it's on a device, just pass it through
1067 		 * using PTIOCLINUX, the device itself might be able to
1068 		 * make some sense of it.
1069 		 * XXX hack: if the function returns EJUSTRETURN,
1070 		 * it has stuffed a sysctl return value in pt.data.
1071 		 */
1072 		ioctlf = fp->f_ops->fo_ioctl;
1073 		pt.com = SCARG(uap, com);
1074 		pt.data = SCARG(uap, data);
1075 		error = ioctlf(fp, PTIOCLINUX, (void *)&pt, l);
1076 		if (error == EJUSTRETURN) {
1077 			retval[0] = (register_t)pt.data;
1078 			error = 0;
1079 		}
1080 
1081 		if (error == ENOTTY) {
1082 			DPRINTF(("linux_machdepioctl: invalid ioctl %08lx\n",
1083 			    com));
1084 		}
1085 		goto out;
1086 	}
1087 	SCARG(&bia, com) = com;
1088 	/* XXX NJWLWP */
1089 	error = sys_ioctl(curlwp, &bia, retval);
1090 out:
1091 	FILE_UNUSE(fp ,l);
1092 	return error;
1093 }
1094 
1095 /*
1096  * Set I/O permissions for a process. Just set the maximum level
1097  * right away (ignoring the argument), otherwise we would have
1098  * to rely on I/O permission maps, which are not implemented.
1099  */
1100 int
1101 linux_sys_iopl(struct lwp *l, void *v, register_t *retval)
1102 {
1103 #if 0
1104 	struct linux_sys_iopl_args /* {
1105 		syscallarg(int) level;
1106 	} */ *uap = v;
1107 #endif
1108 	struct trapframe *fp = l->l_md.md_regs;
1109 
1110 	if (kauth_authorize_machdep(l->l_cred, KAUTH_MACHDEP_IOPL,
1111 	    NULL, NULL, NULL, NULL) != 0)
1112 		return EPERM;
1113 	fp->tf_eflags |= PSL_IOPL;
1114 	*retval = 0;
1115 	return 0;
1116 }
1117 
1118 /*
1119  * See above. If a root process tries to set access to an I/O port,
1120  * just let it have the whole range.
1121  */
1122 int
1123 linux_sys_ioperm(l, v, retval)
1124 	struct lwp *l;
1125 	void *v;
1126 	register_t *retval;
1127 {
1128 	struct linux_sys_ioperm_args /* {
1129 		syscallarg(unsigned int) lo;
1130 		syscallarg(unsigned int) hi;
1131 		syscallarg(int) val;
1132 	} */ *uap = v;
1133 	struct trapframe *fp = l->l_md.md_regs;
1134 
1135 	if (kauth_authorize_machdep(l->l_cred, SCARG(uap, val) ?
1136 	    KAUTH_MACHDEP_IOPERM_SET : KAUTH_MACHDEP_IOPERM_GET, NULL, NULL,
1137 	    NULL, NULL) != 0)
1138 		return EPERM;
1139 	if (SCARG(uap, val))
1140 		fp->tf_eflags |= PSL_IOPL;
1141 	*retval = 0;
1142 	return 0;
1143 }
1144 
1145 int
1146 linux_usertrap(struct lwp *l, vaddr_t trapaddr,
1147     void *arg)
1148 {
1149 	return 0;
1150 }
1151 
1152 const char *
1153 linux_get_uname_arch(void)
1154 {
1155 	static char uname_arch[5] = "i386";
1156 
1157 	if (uname_arch[1] == '3')
1158 		uname_arch[1] += cpu_class;
1159 	return uname_arch;
1160 }
1161 
1162 #ifdef LINUX_NPTL
1163 void *
1164 linux_get_newtls(l)
1165 	struct lwp *l;
1166 {
1167 	struct trapframe *tf = l->l_md.md_regs;
1168 
1169 	/* XXX: Implement me */
1170 	return NULL;
1171 }
1172 
1173 int
1174 linux_set_newtls(l, tls)
1175 	struct lwp *l;
1176 	void *tls;
1177 {
1178 	/* XXX: Implement me */
1179 	return 0;
1180 }
1181 #endif
1182