xref: /netbsd-src/sys/compat/linux/arch/i386/linux_machdep.c (revision ce2c90c7c172d95d2402a5b3d96d8f8e6d138a21)
1 /*	$NetBSD: linux_machdep.c,v 1.117 2006/10/14 18:57:06 christos Exp $	*/
2 
3 /*-
4  * Copyright (c) 1995, 2000 The NetBSD Foundation, Inc.
5  * All rights reserved.
6  *
7  * This code is derived from software contributed to The NetBSD Foundation
8  * by Frank van der Linden.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  * 3. All advertising materials mentioning features or use of this software
19  *    must display the following acknowledgement:
20  *	This product includes software developed by the NetBSD
21  *	Foundation, Inc. and its contributors.
22  * 4. Neither the name of The NetBSD Foundation nor the names of its
23  *    contributors may be used to endorse or promote products derived
24  *    from this software without specific prior written permission.
25  *
26  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
27  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
28  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
29  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
30  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36  * POSSIBILITY OF SUCH DAMAGE.
37  */
38 
39 #include <sys/cdefs.h>
40 __KERNEL_RCSID(0, "$NetBSD: linux_machdep.c,v 1.117 2006/10/14 18:57:06 christos Exp $");
41 
42 #if defined(_KERNEL_OPT)
43 #include "opt_vm86.h"
44 #include "opt_user_ldt.h"
45 #endif
46 
47 #include <sys/param.h>
48 #include <sys/systm.h>
49 #include <sys/signalvar.h>
50 #include <sys/kernel.h>
51 #include <sys/proc.h>
52 #include <sys/user.h>
53 #include <sys/buf.h>
54 #include <sys/reboot.h>
55 #include <sys/conf.h>
56 #include <sys/exec.h>
57 #include <sys/file.h>
58 #include <sys/callout.h>
59 #include <sys/malloc.h>
60 #include <sys/mbuf.h>
61 #include <sys/msgbuf.h>
62 #include <sys/mount.h>
63 #include <sys/vnode.h>
64 #include <sys/device.h>
65 #include <sys/sa.h>
66 #include <sys/syscallargs.h>
67 #include <sys/filedesc.h>
68 #include <sys/exec_elf.h>
69 #include <sys/disklabel.h>
70 #include <sys/ioctl.h>
71 #include <sys/wait.h>
72 #include <sys/kauth.h>
73 
74 #include <miscfs/specfs/specdev.h>
75 
76 #include <compat/linux/common/linux_types.h>
77 #include <compat/linux/common/linux_signal.h>
78 #include <compat/linux/common/linux_util.h>
79 #include <compat/linux/common/linux_ioctl.h>
80 #include <compat/linux/common/linux_hdio.h>
81 #include <compat/linux/common/linux_exec.h>
82 #include <compat/linux/common/linux_machdep.h>
83 #include <compat/linux/common/linux_errno.h>
84 
85 #include <compat/linux/linux_syscallargs.h>
86 
87 #include <machine/cpu.h>
88 #include <machine/cpufunc.h>
89 #include <machine/psl.h>
90 #include <machine/reg.h>
91 #include <machine/segments.h>
92 #include <machine/specialreg.h>
93 #include <machine/sysarch.h>
94 #include <machine/vm86.h>
95 #include <machine/vmparam.h>
96 
97 /*
98  * To see whether wscons is configured (for virtual console ioctl calls).
99  */
100 #if defined(_KERNEL_OPT)
101 #include "wsdisplay.h"
102 #endif
103 #if (NWSDISPLAY > 0)
104 #include <dev/wscons/wsconsio.h>
105 #include <dev/wscons/wsdisplay_usl_io.h>
106 #if defined(_KERNEL_OPT)
107 #include "opt_xserver.h"
108 #endif
109 #endif
110 
111 #ifdef USER_LDT
112 #include <machine/cpu.h>
113 int linux_read_ldt __P((struct lwp *, struct linux_sys_modify_ldt_args *,
114     register_t *));
115 int linux_write_ldt __P((struct lwp *, struct linux_sys_modify_ldt_args *,
116     register_t *));
117 #endif
118 
119 #ifdef DEBUG_LINUX
120 #define DPRINTF(a) uprintf a
121 #else
122 #define DPRINTF(a)
123 #endif
124 
125 static struct biosdisk_info *fd2biosinfo __P((struct proc *, struct file *));
126 extern struct disklist *x86_alldisks;
127 static void linux_save_ucontext __P((struct lwp *, struct trapframe *,
128     const sigset_t *, struct sigaltstack *, struct linux_ucontext *));
129 static void linux_save_sigcontext __P((struct lwp *, struct trapframe *,
130     const sigset_t *, struct linux_sigcontext *));
131 static int linux_restore_sigcontext __P((struct lwp *,
132     struct linux_sigcontext *, register_t *));
133 static void linux_rt_sendsig __P((const ksiginfo_t *, const sigset_t *));
134 static void linux_old_sendsig __P((const ksiginfo_t *, const sigset_t *));
135 
136 extern char linux_sigcode[], linux_rt_sigcode[];
137 /*
138  * Deal with some i386-specific things in the Linux emulation code.
139  */
140 
141 void
142 linux_setregs(l, epp, stack)
143 	struct lwp *l;
144 	struct exec_package *epp;
145 	u_long stack;
146 {
147 	struct pcb *pcb = &l->l_addr->u_pcb;
148 	struct trapframe *tf;
149 
150 #if NNPX > 0
151 	/* If we were using the FPU, forget about it. */
152 	if (npxproc == l)
153 		npxdrop();
154 #endif
155 
156 #ifdef USER_LDT
157 	pmap_ldt_cleanup(l);
158 #endif
159 
160 	l->l_md.md_flags &= ~MDL_USEDFPU;
161 
162 	if (i386_use_fxsave) {
163 		pcb->pcb_savefpu.sv_xmm.sv_env.en_cw = __Linux_NPXCW__;
164 		pcb->pcb_savefpu.sv_xmm.sv_env.en_mxcsr = __INITIAL_MXCSR__;
165 	} else
166 		pcb->pcb_savefpu.sv_87.sv_env.en_cw = __Linux_NPXCW__;
167 
168 	tf = l->l_md.md_regs;
169 	tf->tf_gs = GSEL(GUDATA_SEL, SEL_UPL);
170 	tf->tf_fs = GSEL(GUDATA_SEL, SEL_UPL);
171 	tf->tf_es = GSEL(GUDATA_SEL, SEL_UPL);
172 	tf->tf_ds = GSEL(GUDATA_SEL, SEL_UPL);
173 	tf->tf_edi = 0;
174 	tf->tf_esi = 0;
175 	tf->tf_ebp = 0;
176 	tf->tf_ebx = (int)l->l_proc->p_psstr;
177 	tf->tf_edx = 0;
178 	tf->tf_ecx = 0;
179 	tf->tf_eax = 0;
180 	tf->tf_eip = epp->ep_entry;
181 	tf->tf_cs = GSEL(GUCODEBIG_SEL, SEL_UPL);
182 	tf->tf_eflags = PSL_USERSET;
183 	tf->tf_esp = stack;
184 	tf->tf_ss = GSEL(GUDATA_SEL, SEL_UPL);
185 }
186 
187 /*
188  * Send an interrupt to process.
189  *
190  * Stack is set up to allow sigcode stored
191  * in u. to call routine, followed by kcall
192  * to sigreturn routine below.  After sigreturn
193  * resets the signal mask, the stack, and the
194  * frame pointer, it returns to the user
195  * specified pc, psl.
196  */
197 
198 void
199 linux_sendsig(const ksiginfo_t *ksi, const sigset_t *mask)
200 {
201 	if (SIGACTION(curproc, ksi->ksi_signo).sa_flags & SA_SIGINFO)
202 		linux_rt_sendsig(ksi, mask);
203 	else
204 		linux_old_sendsig(ksi, mask);
205 }
206 
207 
208 static void
209 linux_save_ucontext(l, tf, mask, sas, uc)
210 	struct lwp *l;
211 	struct trapframe *tf;
212 	const sigset_t *mask;
213 	struct sigaltstack *sas;
214 	struct linux_ucontext *uc;
215 {
216 	uc->uc_flags = 0;
217 	uc->uc_link = NULL;
218 	native_to_linux_sigaltstack(&uc->uc_stack, sas);
219 	linux_save_sigcontext(l, tf, mask, &uc->uc_mcontext);
220 	native_to_linux_sigset(&uc->uc_sigmask, mask);
221 	(void)memset(&uc->uc_fpregs_mem, 0, sizeof(uc->uc_fpregs_mem));
222 }
223 
224 static void
225 linux_save_sigcontext(l, tf, mask, sc)
226 	struct lwp *l;
227 	struct trapframe *tf;
228 	const sigset_t *mask;
229 	struct linux_sigcontext *sc;
230 {
231 	/* Save register context. */
232 #ifdef VM86
233 	if (tf->tf_eflags & PSL_VM) {
234 		sc->sc_gs = tf->tf_vm86_gs;
235 		sc->sc_fs = tf->tf_vm86_fs;
236 		sc->sc_es = tf->tf_vm86_es;
237 		sc->sc_ds = tf->tf_vm86_ds;
238 		sc->sc_eflags = get_vflags(l);
239 	} else
240 #endif
241 	{
242 		sc->sc_gs = tf->tf_gs;
243 		sc->sc_fs = tf->tf_fs;
244 		sc->sc_es = tf->tf_es;
245 		sc->sc_ds = tf->tf_ds;
246 		sc->sc_eflags = tf->tf_eflags;
247 	}
248 	sc->sc_edi = tf->tf_edi;
249 	sc->sc_esi = tf->tf_esi;
250 	sc->sc_esp = tf->tf_esp;
251 	sc->sc_ebp = tf->tf_ebp;
252 	sc->sc_ebx = tf->tf_ebx;
253 	sc->sc_edx = tf->tf_edx;
254 	sc->sc_ecx = tf->tf_ecx;
255 	sc->sc_eax = tf->tf_eax;
256 	sc->sc_eip = tf->tf_eip;
257 	sc->sc_cs = tf->tf_cs;
258 	sc->sc_esp_at_signal = tf->tf_esp;
259 	sc->sc_ss = tf->tf_ss;
260 	sc->sc_err = tf->tf_err;
261 	sc->sc_trapno = tf->tf_trapno;
262 	sc->sc_cr2 = l->l_addr->u_pcb.pcb_cr2;
263 	sc->sc_387 = NULL;
264 
265 	/* Save signal stack. */
266 	/* Linux doesn't save the onstack flag in sigframe */
267 
268 	/* Save signal mask. */
269 	native_to_linux_old_sigset(&sc->sc_mask, mask);
270 }
271 
272 static void
273 linux_rt_sendsig(const ksiginfo_t *ksi, const sigset_t *mask)
274 {
275 	struct lwp *l = curlwp;
276 	struct proc *p = l->l_proc;
277 	struct trapframe *tf;
278 	struct linux_rt_sigframe *fp, frame;
279 	int onstack;
280 	linux_siginfo_t *lsi;
281 	int sig = ksi->ksi_signo;
282 	sig_t catcher = SIGACTION(p, sig).sa_handler;
283 	struct sigaltstack *sas = &p->p_sigctx.ps_sigstk;
284 
285 	tf = l->l_md.md_regs;
286 	/* Do we need to jump onto the signal stack? */
287 	onstack = (sas->ss_flags & (SS_DISABLE | SS_ONSTACK)) == 0 &&
288 	    (SIGACTION(p, sig).sa_flags & SA_ONSTACK) != 0;
289 
290 
291 	/* Allocate space for the signal handler context. */
292 	if (onstack)
293 		fp = (struct linux_rt_sigframe *)((caddr_t)sas->ss_sp +
294 		    sas->ss_size);
295 	else
296 		fp = (struct linux_rt_sigframe *)tf->tf_esp;
297 	fp--;
298 
299 	DPRINTF(("rt: onstack = %d, fp = %p sig = %d eip = 0x%x cr2 = 0x%x\n",
300 	    onstack, fp, sig, tf->tf_eip, l->l_addr->u_pcb.pcb_cr2));
301 
302 	/* Build stack frame for signal trampoline. */
303 	frame.sf_handler = catcher;
304 	frame.sf_sig = native_to_linux_signo[sig];
305 	frame.sf_sip = &fp->sf_si;
306 	frame.sf_ucp = &fp->sf_uc;
307 
308 	/*
309 	 * XXX: the following code assumes that the constants for
310 	 * siginfo are the same between linux and NetBSD.
311 	 */
312 	(void)memset(lsi = &frame.sf_si, 0, sizeof(frame.sf_si));
313 	lsi->lsi_errno = native_to_linux_errno[ksi->ksi_errno];
314 	lsi->lsi_code = ksi->ksi_code;
315 	switch (lsi->lsi_signo = frame.sf_sig) {
316 	case LINUX_SIGILL:
317 	case LINUX_SIGFPE:
318 	case LINUX_SIGSEGV:
319 	case LINUX_SIGBUS:
320 	case LINUX_SIGTRAP:
321 		lsi->lsi_addr = ksi->ksi_addr;
322 		break;
323 	case LINUX_SIGCHLD:
324 		lsi->lsi_uid = ksi->ksi_uid;
325 		lsi->lsi_pid = ksi->ksi_pid;
326 		lsi->lsi_utime = ksi->ksi_utime;
327 		lsi->lsi_stime = ksi->ksi_stime;
328 
329 		/* We use the same codes */
330 		lsi->lsi_code = ksi->ksi_code;
331 		/* XXX is that right? */
332 		lsi->lsi_status = WEXITSTATUS(ksi->ksi_status);
333 		break;
334 	case LINUX_SIGIO:
335 		lsi->lsi_band = ksi->ksi_band;
336 		lsi->lsi_fd = ksi->ksi_fd;
337 		break;
338 	default:
339 		lsi->lsi_uid = ksi->ksi_uid;
340 		lsi->lsi_pid = ksi->ksi_pid;
341 		if (lsi->lsi_signo == LINUX_SIGALRM ||
342 		    lsi->lsi_signo >= LINUX_SIGRTMIN)
343 			lsi->lsi_value.sival_ptr = ksi->ksi_sigval.sival_ptr;
344 		break;
345 	}
346 
347 	/* Save register context. */
348 	linux_save_ucontext(l, tf, mask, sas, &frame.sf_uc);
349 
350 	if (copyout(&frame, fp, sizeof(frame)) != 0) {
351 		/*
352 		 * Process has trashed its stack; give it an illegal
353 		 * instruction to halt it in its tracks.
354 		 */
355 		sigexit(l, SIGILL);
356 		/* NOTREACHED */
357 	}
358 
359 	/*
360 	 * Build context to run handler in.
361 	 */
362 	tf->tf_gs = GSEL(GUDATA_SEL, SEL_UPL);
363 	tf->tf_fs = GSEL(GUDATA_SEL, SEL_UPL);
364 	tf->tf_es = GSEL(GUDATA_SEL, SEL_UPL);
365 	tf->tf_ds = GSEL(GUDATA_SEL, SEL_UPL);
366 	tf->tf_eip = ((int)p->p_sigctx.ps_sigcode) +
367 	    (linux_rt_sigcode - linux_sigcode);
368 	tf->tf_cs = GSEL(GUCODE_SEL, SEL_UPL);
369 	tf->tf_eflags &= ~(PSL_T|PSL_VM|PSL_AC);
370 	tf->tf_esp = (int)fp;
371 	tf->tf_ss = GSEL(GUDATA_SEL, SEL_UPL);
372 
373 	/* Remember that we're now on the signal stack. */
374 	if (onstack)
375 		sas->ss_flags |= SS_ONSTACK;
376 }
377 
378 static void
379 linux_old_sendsig(const ksiginfo_t *ksi, const sigset_t *mask)
380 {
381 	struct lwp *l = curlwp;
382 	struct proc *p = l->l_proc;
383 	struct trapframe *tf;
384 	struct linux_sigframe *fp, frame;
385 	int onstack;
386 	int sig = ksi->ksi_signo;
387 	sig_t catcher = SIGACTION(p, sig).sa_handler;
388 	struct sigaltstack *sas = &p->p_sigctx.ps_sigstk;
389 
390 	tf = l->l_md.md_regs;
391 
392 	/* Do we need to jump onto the signal stack? */
393 	onstack = (sas->ss_flags & (SS_DISABLE | SS_ONSTACK)) == 0 &&
394 	    (SIGACTION(p, sig).sa_flags & SA_ONSTACK) != 0;
395 
396 	/* Allocate space for the signal handler context. */
397 	if (onstack)
398 		fp = (struct linux_sigframe *) ((caddr_t)sas->ss_sp +
399 		    sas->ss_size);
400 	else
401 		fp = (struct linux_sigframe *)tf->tf_esp;
402 	fp--;
403 
404 	DPRINTF(("old: onstack = %d, fp = %p sig = %d eip = 0x%x cr2 = 0x%x\n",
405 	    onstack, fp, sig, tf->tf_eip, l->l_addr->u_pcb.pcb_cr2));
406 
407 	/* Build stack frame for signal trampoline. */
408 	frame.sf_handler = catcher;
409 	frame.sf_sig = native_to_linux_signo[sig];
410 
411 	linux_save_sigcontext(l, tf, mask, &frame.sf_sc);
412 
413 	if (copyout(&frame, fp, sizeof(frame)) != 0) {
414 		/*
415 		 * Process has trashed its stack; give it an illegal
416 		 * instruction to halt it in its tracks.
417 		 */
418 		sigexit(l, SIGILL);
419 		/* NOTREACHED */
420 	}
421 
422 	/*
423 	 * Build context to run handler in.
424 	 */
425 	tf->tf_gs = GSEL(GUDATA_SEL, SEL_UPL);
426 	tf->tf_fs = GSEL(GUDATA_SEL, SEL_UPL);
427 	tf->tf_es = GSEL(GUDATA_SEL, SEL_UPL);
428 	tf->tf_ds = GSEL(GUDATA_SEL, SEL_UPL);
429 	tf->tf_eip = (int)p->p_sigctx.ps_sigcode;
430 	tf->tf_cs = GSEL(GUCODEBIG_SEL, SEL_UPL);
431 	tf->tf_eflags &= ~(PSL_T|PSL_VM|PSL_AC);
432 	tf->tf_esp = (int)fp;
433 	tf->tf_ss = GSEL(GUDATA_SEL, SEL_UPL);
434 
435 	/* Remember that we're now on the signal stack. */
436 	if (onstack)
437 		sas->ss_flags |= SS_ONSTACK;
438 }
439 
440 /*
441  * System call to cleanup state after a signal
442  * has been taken.  Reset signal mask and
443  * stack state from context left by sendsig (above).
444  * Return to previous pc and psl as specified by
445  * context left by sendsig. Check carefully to
446  * make sure that the user has not modified the
447  * psl to gain improper privileges or to cause
448  * a machine fault.
449  */
450 int
451 linux_sys_rt_sigreturn(l, v, retval)
452 	struct lwp *l;
453 	void *v;
454 	register_t *retval;
455 {
456 	struct linux_sys_rt_sigreturn_args /* {
457 		syscallarg(struct linux_ucontext *) ucp;
458 	} */ *uap = v;
459 	struct linux_ucontext context, *ucp = SCARG(uap, ucp);
460 	int error;
461 
462 	/*
463 	 * The trampoline code hands us the context.
464 	 * It is unsafe to keep track of it ourselves, in the event that a
465 	 * program jumps out of a signal handler.
466 	 */
467 	if ((error = copyin(ucp, &context, sizeof(*ucp))) != 0)
468 		return error;
469 
470 	/* XXX XAX we can do better here by using more of the ucontext */
471 	return linux_restore_sigcontext(l, &context.uc_mcontext, retval);
472 }
473 
474 int
475 linux_sys_sigreturn(l, v, retval)
476 	struct lwp *l;
477 	void *v;
478 	register_t *retval;
479 {
480 	struct linux_sys_sigreturn_args /* {
481 		syscallarg(struct linux_sigcontext *) scp;
482 	} */ *uap = v;
483 	struct linux_sigcontext context, *scp = SCARG(uap, scp);
484 	int error;
485 
486 	/*
487 	 * The trampoline code hands us the context.
488 	 * It is unsafe to keep track of it ourselves, in the event that a
489 	 * program jumps out of a signal handler.
490 	 */
491 	if ((error = copyin((caddr_t)scp, &context, sizeof(*scp))) != 0)
492 		return error;
493 	return linux_restore_sigcontext(l, &context, retval);
494 }
495 
496 static int
497 linux_restore_sigcontext(struct lwp *l, struct linux_sigcontext *scp,
498     register_t *retval __unused)
499 {
500 	struct proc *p = l->l_proc;
501 	struct sigaltstack *sas = &p->p_sigctx.ps_sigstk;
502 	struct trapframe *tf;
503 	sigset_t mask;
504 	ssize_t ss_gap;
505 	/* Restore register context. */
506 	tf = l->l_md.md_regs;
507 
508 	DPRINTF(("sigreturn enter esp=%x eip=%x\n", tf->tf_esp, tf->tf_eip));
509 #ifdef VM86
510 	if (scp->sc_eflags & PSL_VM) {
511 		void syscall_vm86 __P((struct trapframe *));
512 
513 		tf->tf_vm86_gs = scp->sc_gs;
514 		tf->tf_vm86_fs = scp->sc_fs;
515 		tf->tf_vm86_es = scp->sc_es;
516 		tf->tf_vm86_ds = scp->sc_ds;
517 		set_vflags(l, scp->sc_eflags);
518 		p->p_md.md_syscall = syscall_vm86;
519 	} else
520 #endif
521 	{
522 		/*
523 		 * Check for security violations.  If we're returning to
524 		 * protected mode, the CPU will validate the segment registers
525 		 * automatically and generate a trap on violations.  We handle
526 		 * the trap, rather than doing all of the checking here.
527 		 */
528 		if (((scp->sc_eflags ^ tf->tf_eflags) & PSL_USERSTATIC) != 0 ||
529 		    !USERMODE(scp->sc_cs, scp->sc_eflags))
530 			return EINVAL;
531 
532 		tf->tf_gs = scp->sc_gs;
533 		tf->tf_fs = scp->sc_fs;
534 		tf->tf_es = scp->sc_es;
535 		tf->tf_ds = scp->sc_ds;
536 #ifdef VM86
537 		if (tf->tf_eflags & PSL_VM)
538 			(*p->p_emul->e_syscall_intern)(p);
539 #endif
540 		tf->tf_eflags = scp->sc_eflags;
541 	}
542 	tf->tf_edi = scp->sc_edi;
543 	tf->tf_esi = scp->sc_esi;
544 	tf->tf_ebp = scp->sc_ebp;
545 	tf->tf_ebx = scp->sc_ebx;
546 	tf->tf_edx = scp->sc_edx;
547 	tf->tf_ecx = scp->sc_ecx;
548 	tf->tf_eax = scp->sc_eax;
549 	tf->tf_eip = scp->sc_eip;
550 	tf->tf_cs = scp->sc_cs;
551 	tf->tf_esp = scp->sc_esp_at_signal;
552 	tf->tf_ss = scp->sc_ss;
553 
554 	/* Restore signal stack. */
555 	/*
556 	 * Linux really does it this way; it doesn't have space in sigframe
557 	 * to save the onstack flag.
558 	 */
559 	ss_gap = (ssize_t)
560 	    ((caddr_t) scp->sc_esp_at_signal - (caddr_t) sas->ss_sp);
561 	if (ss_gap >= 0 && ss_gap < sas->ss_size)
562 		sas->ss_flags |= SS_ONSTACK;
563 	else
564 		sas->ss_flags &= ~SS_ONSTACK;
565 
566 	/* Restore signal mask. */
567 	linux_old_to_native_sigset(&mask, &scp->sc_mask);
568 	(void) sigprocmask1(p, SIG_SETMASK, &mask, 0);
569 	DPRINTF(("sigreturn exit esp=%x eip=%x\n", tf->tf_esp, tf->tf_eip));
570 	return EJUSTRETURN;
571 }
572 
573 #ifdef USER_LDT
574 
575 int
576 linux_read_ldt(l, uap, retval)
577 	struct lwp *l;
578 	struct linux_sys_modify_ldt_args /* {
579 		syscallarg(int) func;
580 		syscallarg(void *) ptr;
581 		syscallarg(size_t) bytecount;
582 	} */ *uap;
583 	register_t *retval;
584 {
585 	struct proc *p = l->l_proc;
586 	struct i386_get_ldt_args gl;
587 	int error;
588 	caddr_t sg;
589 	char *parms;
590 
591 	DPRINTF(("linux_read_ldt!"));
592 	sg = stackgap_init(p, 0);
593 
594 	gl.start = 0;
595 	gl.desc = SCARG(uap, ptr);
596 	gl.num = SCARG(uap, bytecount) / sizeof(union descriptor);
597 
598 	parms = stackgap_alloc(p, &sg, sizeof(gl));
599 
600 	if ((error = copyout(&gl, parms, sizeof(gl))) != 0)
601 		return (error);
602 
603 	if ((error = i386_get_ldt(l, parms, retval)) != 0)
604 		return (error);
605 
606 	*retval *= sizeof(union descriptor);
607 	return (0);
608 }
609 
610 struct linux_ldt_info {
611 	u_int entry_number;
612 	u_long base_addr;
613 	u_int limit;
614 	u_int seg_32bit:1;
615 	u_int contents:2;
616 	u_int read_exec_only:1;
617 	u_int limit_in_pages:1;
618 	u_int seg_not_present:1;
619 	u_int useable:1;
620 };
621 
622 int
623 linux_write_ldt(l, uap, retval)
624 	struct lwp *l;
625 	struct linux_sys_modify_ldt_args /* {
626 		syscallarg(int) func;
627 		syscallarg(void *) ptr;
628 		syscallarg(size_t) bytecount;
629 	} */ *uap;
630 	register_t *retval;
631 {
632 	struct proc *p = l->l_proc;
633 	struct linux_ldt_info ldt_info;
634 	struct segment_descriptor sd;
635 	struct i386_set_ldt_args sl;
636 	int error;
637 	caddr_t sg;
638 	char *parms;
639 	int oldmode = (int)retval[0];
640 
641 	DPRINTF(("linux_write_ldt %d\n", oldmode));
642 	if (SCARG(uap, bytecount) != sizeof(ldt_info))
643 		return (EINVAL);
644 	if ((error = copyin(SCARG(uap, ptr), &ldt_info, sizeof(ldt_info))) != 0)
645 		return error;
646 	if (ldt_info.entry_number >= 8192)
647 		return (EINVAL);
648 	if (ldt_info.contents == 3) {
649 		if (oldmode)
650 			return (EINVAL);
651 		if (ldt_info.seg_not_present)
652 			return (EINVAL);
653 	}
654 
655 	if (ldt_info.base_addr == 0 && ldt_info.limit == 0 &&
656 	    (oldmode || (ldt_info.contents == 0 &&
657 	    ldt_info.read_exec_only == 1 && ldt_info.seg_32bit == 0 &&
658 	    ldt_info.limit_in_pages == 0 && ldt_info.seg_not_present == 1 &&
659 	    ldt_info.useable == 0))) {
660 		/* this means you should zero the ldt */
661 		(void)memset(&sd, 0, sizeof(sd));
662 	} else {
663 		sd.sd_lobase = ldt_info.base_addr & 0xffffff;
664 		sd.sd_hibase = (ldt_info.base_addr >> 24) & 0xff;
665 		sd.sd_lolimit = ldt_info.limit & 0xffff;
666 		sd.sd_hilimit = (ldt_info.limit >> 16) & 0xf;
667 		sd.sd_type = 16 | (ldt_info.contents << 2) |
668 		    (!ldt_info.read_exec_only << 1);
669 		sd.sd_dpl = SEL_UPL;
670 		sd.sd_p = !ldt_info.seg_not_present;
671 		sd.sd_def32 = ldt_info.seg_32bit;
672 		sd.sd_gran = ldt_info.limit_in_pages;
673 		if (!oldmode)
674 			sd.sd_xx = ldt_info.useable;
675 		else
676 			sd.sd_xx = 0;
677 	}
678 	sg = stackgap_init(p, 0);
679 	sl.start = ldt_info.entry_number;
680 	sl.desc = stackgap_alloc(p, &sg, sizeof(sd));
681 	sl.num = 1;
682 
683 	DPRINTF(("linux_write_ldt: idx=%d, base=0x%lx, limit=0x%x\n",
684 	    ldt_info.entry_number, ldt_info.base_addr, ldt_info.limit));
685 
686 	parms = stackgap_alloc(p, &sg, sizeof(sl));
687 
688 	if ((error = copyout(&sd, sl.desc, sizeof(sd))) != 0)
689 		return (error);
690 	if ((error = copyout(&sl, parms, sizeof(sl))) != 0)
691 		return (error);
692 
693 	if ((error = i386_set_ldt(l, parms, retval)) != 0)
694 		return (error);
695 
696 	*retval = 0;
697 	return (0);
698 }
699 
700 #endif /* USER_LDT */
701 
702 int
703 linux_sys_modify_ldt(struct lwp *l __unused, void *v,
704     register_t *retval __unused)
705 {
706 	struct linux_sys_modify_ldt_args /* {
707 		syscallarg(int) func;
708 		syscallarg(void *) ptr;
709 		syscallarg(size_t) bytecount;
710 	} */ *uap = v;
711 
712 	switch (SCARG(uap, func)) {
713 #ifdef USER_LDT
714 	case 0:
715 		return linux_read_ldt(l, uap, retval);
716 	case 1:
717 		retval[0] = 1;
718 		return linux_write_ldt(l, uap, retval);
719 	case 2:
720 #ifdef notyet
721 		return (linux_read_default_ldt(l, uap, retval);
722 #else
723 		return (ENOSYS);
724 #endif
725 	case 0x11:
726 		retval[0] = 0;
727 		return linux_write_ldt(l, uap, retval);
728 #endif /* USER_LDT */
729 
730 	default:
731 		return (ENOSYS);
732 	}
733 }
734 
735 /*
736  * XXX Pathetic hack to make svgalib work. This will fake the major
737  * device number of an opened VT so that svgalib likes it. grmbl.
738  * Should probably do it 'wrong the right way' and use a mapping
739  * array for all major device numbers, and map linux_mknod too.
740  */
741 dev_t
742 linux_fakedev(dev, raw)
743 	dev_t dev;
744 	int raw;
745 {
746 	extern const struct cdevsw ptc_cdevsw, pts_cdevsw;
747 	const struct cdevsw *cd = cdevsw_lookup(dev);
748 
749 	if (raw) {
750 #if (NWSDISPLAY > 0)
751 		extern const struct cdevsw wsdisplay_cdevsw;
752 		if (cd == &wsdisplay_cdevsw)
753 			return makedev(LINUX_CONS_MAJOR, (minor(dev) + 1));
754 #endif
755 	}
756 
757 	if (cd == &ptc_cdevsw)
758 		return makedev(LINUX_PTC_MAJOR, minor(dev));
759 	if (cd == &pts_cdevsw)
760 		return makedev(LINUX_PTS_MAJOR, minor(dev));
761 
762 	return dev;
763 }
764 
765 #if (NWSDISPLAY > 0)
766 /*
767  * That's not complete, but enough to get an X server running.
768  */
769 #define NR_KEYS 128
770 static const u_short plain_map[NR_KEYS] = {
771 	0x0200,	0x001b,	0x0031,	0x0032,	0x0033,	0x0034,	0x0035,	0x0036,
772 	0x0037,	0x0038,	0x0039,	0x0030,	0x002d,	0x003d,	0x007f,	0x0009,
773 	0x0b71,	0x0b77,	0x0b65,	0x0b72,	0x0b74,	0x0b79,	0x0b75,	0x0b69,
774 	0x0b6f,	0x0b70,	0x005b,	0x005d,	0x0201,	0x0702,	0x0b61,	0x0b73,
775 	0x0b64,	0x0b66,	0x0b67,	0x0b68,	0x0b6a,	0x0b6b,	0x0b6c,	0x003b,
776 	0x0027,	0x0060,	0x0700,	0x005c,	0x0b7a,	0x0b78,	0x0b63,	0x0b76,
777 	0x0b62,	0x0b6e,	0x0b6d,	0x002c,	0x002e,	0x002f,	0x0700,	0x030c,
778 	0x0703,	0x0020,	0x0207,	0x0100,	0x0101,	0x0102,	0x0103,	0x0104,
779 	0x0105,	0x0106,	0x0107,	0x0108,	0x0109,	0x0208,	0x0209,	0x0307,
780 	0x0308,	0x0309,	0x030b,	0x0304,	0x0305,	0x0306,	0x030a,	0x0301,
781 	0x0302,	0x0303,	0x0300,	0x0310,	0x0206,	0x0200,	0x003c,	0x010a,
782 	0x010b,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
783 	0x030e,	0x0702,	0x030d,	0x001c,	0x0701,	0x0205,	0x0114,	0x0603,
784 	0x0118,	0x0601,	0x0602,	0x0117,	0x0600,	0x0119,	0x0115,	0x0116,
785 	0x011a,	0x010c,	0x010d,	0x011b,	0x011c,	0x0110,	0x0311,	0x011d,
786 	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
787 }, shift_map[NR_KEYS] = {
788 	0x0200,	0x001b,	0x0021,	0x0040,	0x0023,	0x0024,	0x0025,	0x005e,
789 	0x0026,	0x002a,	0x0028,	0x0029,	0x005f,	0x002b,	0x007f,	0x0009,
790 	0x0b51,	0x0b57,	0x0b45,	0x0b52,	0x0b54,	0x0b59,	0x0b55,	0x0b49,
791 	0x0b4f,	0x0b50,	0x007b,	0x007d,	0x0201,	0x0702,	0x0b41,	0x0b53,
792 	0x0b44,	0x0b46,	0x0b47,	0x0b48,	0x0b4a,	0x0b4b,	0x0b4c,	0x003a,
793 	0x0022,	0x007e,	0x0700,	0x007c,	0x0b5a,	0x0b58,	0x0b43,	0x0b56,
794 	0x0b42,	0x0b4e,	0x0b4d,	0x003c,	0x003e,	0x003f,	0x0700,	0x030c,
795 	0x0703,	0x0020,	0x0207,	0x010a,	0x010b,	0x010c,	0x010d,	0x010e,
796 	0x010f,	0x0110,	0x0111,	0x0112,	0x0113,	0x0213,	0x0203,	0x0307,
797 	0x0308,	0x0309,	0x030b,	0x0304,	0x0305,	0x0306,	0x030a,	0x0301,
798 	0x0302,	0x0303,	0x0300,	0x0310,	0x0206,	0x0200,	0x003e,	0x010a,
799 	0x010b,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
800 	0x030e,	0x0702,	0x030d,	0x0200,	0x0701,	0x0205,	0x0114,	0x0603,
801 	0x020b,	0x0601,	0x0602,	0x0117,	0x0600,	0x020a,	0x0115,	0x0116,
802 	0x011a,	0x010c,	0x010d,	0x011b,	0x011c,	0x0110,	0x0311,	0x011d,
803 	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
804 }, altgr_map[NR_KEYS] = {
805 	0x0200,	0x0200,	0x0200,	0x0040,	0x0200,	0x0024,	0x0200,	0x0200,
806 	0x007b,	0x005b,	0x005d,	0x007d,	0x005c,	0x0200,	0x0200,	0x0200,
807 	0x0b71,	0x0b77,	0x0918,	0x0b72,	0x0b74,	0x0b79,	0x0b75,	0x0b69,
808 	0x0b6f,	0x0b70,	0x0200,	0x007e,	0x0201,	0x0702,	0x0914,	0x0b73,
809 	0x0917,	0x0919,	0x0b67,	0x0b68,	0x0b6a,	0x0b6b,	0x0b6c,	0x0200,
810 	0x0200,	0x0200,	0x0700,	0x0200,	0x0b7a,	0x0b78,	0x0916,	0x0b76,
811 	0x0915,	0x0b6e,	0x0b6d,	0x0200,	0x0200,	0x0200,	0x0700,	0x030c,
812 	0x0703,	0x0200,	0x0207,	0x050c,	0x050d,	0x050e,	0x050f,	0x0510,
813 	0x0511,	0x0512,	0x0513,	0x0514,	0x0515,	0x0208,	0x0202,	0x0911,
814 	0x0912,	0x0913,	0x030b,	0x090e,	0x090f,	0x0910,	0x030a,	0x090b,
815 	0x090c,	0x090d,	0x090a,	0x0310,	0x0206,	0x0200,	0x007c,	0x0516,
816 	0x0517,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
817 	0x030e,	0x0702,	0x030d,	0x0200,	0x0701,	0x0205,	0x0114,	0x0603,
818 	0x0118,	0x0601,	0x0602,	0x0117,	0x0600,	0x0119,	0x0115,	0x0116,
819 	0x011a,	0x010c,	0x010d,	0x011b,	0x011c,	0x0110,	0x0311,	0x011d,
820 	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
821 }, ctrl_map[NR_KEYS] = {
822 	0x0200,	0x0200,	0x0200,	0x0000,	0x001b,	0x001c,	0x001d,	0x001e,
823 	0x001f,	0x007f,	0x0200,	0x0200,	0x001f,	0x0200,	0x0008,	0x0200,
824 	0x0011,	0x0017,	0x0005,	0x0012,	0x0014,	0x0019,	0x0015,	0x0009,
825 	0x000f,	0x0010,	0x001b,	0x001d,	0x0201,	0x0702,	0x0001,	0x0013,
826 	0x0004,	0x0006,	0x0007,	0x0008,	0x000a,	0x000b,	0x000c,	0x0200,
827 	0x0007,	0x0000,	0x0700,	0x001c,	0x001a,	0x0018,	0x0003,	0x0016,
828 	0x0002,	0x000e,	0x000d,	0x0200,	0x020e,	0x007f,	0x0700,	0x030c,
829 	0x0703,	0x0000,	0x0207,	0x0100,	0x0101,	0x0102,	0x0103,	0x0104,
830 	0x0105,	0x0106,	0x0107,	0x0108,	0x0109,	0x0208,	0x0204,	0x0307,
831 	0x0308,	0x0309,	0x030b,	0x0304,	0x0305,	0x0306,	0x030a,	0x0301,
832 	0x0302,	0x0303,	0x0300,	0x0310,	0x0206,	0x0200,	0x0200,	0x010a,
833 	0x010b,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
834 	0x030e,	0x0702,	0x030d,	0x001c,	0x0701,	0x0205,	0x0114,	0x0603,
835 	0x0118,	0x0601,	0x0602,	0x0117,	0x0600,	0x0119,	0x0115,	0x0116,
836 	0x011a,	0x010c,	0x010d,	0x011b,	0x011c,	0x0110,	0x0311,	0x011d,
837 	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
838 };
839 
840 const u_short * const linux_keytabs[] = {
841 	plain_map, shift_map, altgr_map, altgr_map, ctrl_map
842 };
843 #endif
844 
845 static struct biosdisk_info *
846 fd2biosinfo(struct proc *p __unused, struct file *fp)
847 {
848 	struct vnode *vp;
849 	const char *blkname;
850 	char diskname[16];
851 	int i;
852 	struct nativedisk_info *nip;
853 	struct disklist *dl = x86_alldisks;
854 
855 	if (fp->f_type != DTYPE_VNODE)
856 		return NULL;
857 	vp = (struct vnode *)fp->f_data;
858 
859 	if (vp->v_type != VBLK)
860 		return NULL;
861 
862 	blkname = devsw_blk2name(major(vp->v_rdev));
863 	snprintf(diskname, sizeof diskname, "%s%u", blkname,
864 	    DISKUNIT(vp->v_rdev));
865 
866 	for (i = 0; i < dl->dl_nnativedisks; i++) {
867 		nip = &dl->dl_nativedisks[i];
868 		if (strcmp(diskname, nip->ni_devname))
869 			continue;
870 		if (nip->ni_nmatches != 0)
871 			return &dl->dl_biosdisks[nip->ni_biosmatches[0]];
872 	}
873 
874 	return NULL;
875 }
876 
877 
878 /*
879  * We come here in a last attempt to satisfy a Linux ioctl() call
880  */
881 int
882 linux_machdepioctl(l, v, retval)
883 	struct lwp *l;
884 	void *v;
885 	register_t *retval;
886 {
887 	struct linux_sys_ioctl_args /* {
888 		syscallarg(int) fd;
889 		syscallarg(u_long) com;
890 		syscallarg(caddr_t) data;
891 	} */ *uap = v;
892 	struct sys_ioctl_args bia;
893 	u_long com;
894 	int error, error1;
895 #if (NWSDISPLAY > 0)
896 	struct vt_mode lvt;
897 	caddr_t bvtp, sg;
898 	struct kbentry kbe;
899 #endif
900 	struct linux_hd_geometry hdg;
901 	struct linux_hd_big_geometry hdg_big;
902 	struct biosdisk_info *bip;
903 	struct filedesc *fdp;
904 	struct file *fp;
905 	int fd;
906 	struct disklabel label, *labp;
907 	struct partinfo partp;
908 	int (*ioctlf)(struct file *, u_long, void *, struct lwp *);
909 	u_long start, biostotal, realtotal;
910 	u_char heads, sectors;
911 	u_int cylinders;
912 	struct ioctl_pt pt;
913 	struct proc *p = l->l_proc;
914 
915 	fd = SCARG(uap, fd);
916 	SCARG(&bia, fd) = fd;
917 	SCARG(&bia, data) = SCARG(uap, data);
918 	com = SCARG(uap, com);
919 
920 	fdp = p->p_fd;
921 
922 	if ((fp = fd_getfile(fdp, fd)) == NULL)
923 		return (EBADF);
924 
925 	FILE_USE(fp);
926 
927 	switch (com) {
928 #if (NWSDISPLAY > 0)
929 	case LINUX_KDGKBMODE:
930 		com = KDGKBMODE;
931 		break;
932 	case LINUX_KDSKBMODE:
933 		com = KDSKBMODE;
934 		if ((unsigned)SCARG(uap, data) == LINUX_K_MEDIUMRAW)
935 			SCARG(&bia, data) = (caddr_t)K_RAW;
936 		break;
937 	case LINUX_KIOCSOUND:
938 		SCARG(&bia, data) =
939 		    (caddr_t)(((unsigned long)SCARG(&bia, data)) & 0xffff);
940 		/* fall through */
941 	case LINUX_KDMKTONE:
942 		com = KDMKTONE;
943 		break;
944 	case LINUX_KDSETMODE:
945 		com = KDSETMODE;
946 		break;
947 	case LINUX_KDGETMODE:
948 		/* KD_* values are equal to the wscons numbers */
949 		com = WSDISPLAYIO_GMODE;
950 		break;
951 	case LINUX_KDENABIO:
952 		com = KDENABIO;
953 		break;
954 	case LINUX_KDDISABIO:
955 		com = KDDISABIO;
956 		break;
957 	case LINUX_KDGETLED:
958 		com = KDGETLED;
959 		break;
960 	case LINUX_KDSETLED:
961 		com = KDSETLED;
962 		break;
963 	case LINUX_VT_OPENQRY:
964 		com = VT_OPENQRY;
965 		break;
966 	case LINUX_VT_GETMODE:
967 		SCARG(&bia, com) = VT_GETMODE;
968 		/* XXX NJWLWP */
969 		if ((error = sys_ioctl(curlwp, &bia, retval)))
970 			goto out;
971 		if ((error = copyin(SCARG(uap, data), (caddr_t)&lvt,
972 		    sizeof (struct vt_mode))))
973 			goto out;
974 		lvt.relsig = native_to_linux_signo[lvt.relsig];
975 		lvt.acqsig = native_to_linux_signo[lvt.acqsig];
976 		lvt.frsig = native_to_linux_signo[lvt.frsig];
977 		error = copyout((caddr_t)&lvt, SCARG(uap, data),
978 		    sizeof (struct vt_mode));
979 		goto out;
980 	case LINUX_VT_SETMODE:
981 		com = VT_SETMODE;
982 		if ((error = copyin(SCARG(uap, data), (caddr_t)&lvt,
983 		    sizeof (struct vt_mode))))
984 			goto out;
985 		lvt.relsig = linux_to_native_signo[lvt.relsig];
986 		lvt.acqsig = linux_to_native_signo[lvt.acqsig];
987 		lvt.frsig = linux_to_native_signo[lvt.frsig];
988 		sg = stackgap_init(p, 0);
989 		bvtp = stackgap_alloc(p, &sg, sizeof (struct vt_mode));
990 		if ((error = copyout(&lvt, bvtp, sizeof (struct vt_mode))))
991 			goto out;
992 		SCARG(&bia, data) = bvtp;
993 		break;
994 	case LINUX_VT_DISALLOCATE:
995 		/* XXX should use WSDISPLAYIO_DELSCREEN */
996 		error = 0;
997 		goto out;
998 	case LINUX_VT_RELDISP:
999 		com = VT_RELDISP;
1000 		break;
1001 	case LINUX_VT_ACTIVATE:
1002 		com = VT_ACTIVATE;
1003 		break;
1004 	case LINUX_VT_WAITACTIVE:
1005 		com = VT_WAITACTIVE;
1006 		break;
1007 	case LINUX_VT_GETSTATE:
1008 		com = VT_GETSTATE;
1009 		break;
1010 	case LINUX_KDGKBTYPE:
1011 	    {
1012 		static const u_int8_t kb101 = KB_101;
1013 
1014 		/* This is what Linux does. */
1015 		error = copyout(&kb101, SCARG(uap, data), 1);
1016 		goto out;
1017 	    }
1018 	case LINUX_KDGKBENT:
1019 		/*
1020 		 * The Linux KDGKBENT ioctl is different from the
1021 		 * SYSV original. So we handle it in machdep code.
1022 		 * XXX We should use keyboard mapping information
1023 		 * from wsdisplay, but this would be expensive.
1024 		 */
1025 		if ((error = copyin(SCARG(uap, data), &kbe,
1026 				    sizeof(struct kbentry))))
1027 			goto out;
1028 		if (kbe.kb_table >= sizeof(linux_keytabs) / sizeof(u_short *)
1029 		    || kbe.kb_index >= NR_KEYS) {
1030 			error = EINVAL;
1031 			goto out;
1032 		}
1033 		kbe.kb_value = linux_keytabs[kbe.kb_table][kbe.kb_index];
1034 		error = copyout(&kbe, SCARG(uap, data),
1035 				sizeof(struct kbentry));
1036 		goto out;
1037 #endif
1038 	case LINUX_HDIO_GETGEO:
1039 	case LINUX_HDIO_GETGEO_BIG:
1040 		/*
1041 		 * Try to mimic Linux behaviour: return the BIOS geometry
1042 		 * if possible (extending its # of cylinders if it's beyond
1043 		 * the 1023 limit), fall back to the MI geometry (i.e.
1044 		 * the real geometry) if not found, by returning an
1045 		 * error. See common/linux_hdio.c
1046 		 */
1047 		bip = fd2biosinfo(p, fp);
1048 		ioctlf = fp->f_ops->fo_ioctl;
1049 		error = ioctlf(fp, DIOCGDEFLABEL, (caddr_t)&label, l);
1050 		error1 = ioctlf(fp, DIOCGPART, (caddr_t)&partp, l);
1051 		if (error != 0 && error1 != 0) {
1052 			error = error1;
1053 			goto out;
1054 		}
1055 		labp = error != 0 ? &label : partp.disklab;
1056 		start = error1 != 0 ? partp.part->p_offset : 0;
1057 		if (bip != NULL && bip->bi_head != 0 && bip->bi_sec != 0
1058 		    && bip->bi_cyl != 0) {
1059 			heads = bip->bi_head;
1060 			sectors = bip->bi_sec;
1061 			cylinders = bip->bi_cyl;
1062 			biostotal = heads * sectors * cylinders;
1063 			realtotal = labp->d_ntracks * labp->d_nsectors *
1064 			    labp->d_ncylinders;
1065 			if (realtotal > biostotal)
1066 				cylinders = realtotal / (heads * sectors);
1067 		} else {
1068 			heads = labp->d_ntracks;
1069 			cylinders = labp->d_ncylinders;
1070 			sectors = labp->d_nsectors;
1071 		}
1072 		if (com == LINUX_HDIO_GETGEO) {
1073 			hdg.start = start;
1074 			hdg.heads = heads;
1075 			hdg.cylinders = cylinders;
1076 			hdg.sectors = sectors;
1077 			error = copyout(&hdg, SCARG(uap, data), sizeof hdg);
1078 			goto out;
1079 		} else {
1080 			hdg_big.start = start;
1081 			hdg_big.heads = heads;
1082 			hdg_big.cylinders = cylinders;
1083 			hdg_big.sectors = sectors;
1084 			error = copyout(&hdg_big, SCARG(uap, data),
1085 			    sizeof hdg_big);
1086 			goto out;
1087 		}
1088 
1089 	default:
1090 		/*
1091 		 * Unknown to us. If it's on a device, just pass it through
1092 		 * using PTIOCLINUX, the device itself might be able to
1093 		 * make some sense of it.
1094 		 * XXX hack: if the function returns EJUSTRETURN,
1095 		 * it has stuffed a sysctl return value in pt.data.
1096 		 */
1097 		ioctlf = fp->f_ops->fo_ioctl;
1098 		pt.com = SCARG(uap, com);
1099 		pt.data = SCARG(uap, data);
1100 		error = ioctlf(fp, PTIOCLINUX, (caddr_t)&pt, l);
1101 		if (error == EJUSTRETURN) {
1102 			retval[0] = (register_t)pt.data;
1103 			error = 0;
1104 		}
1105 
1106 		if (error == ENOTTY) {
1107 			DPRINTF(("linux_machdepioctl: invalid ioctl %08lx\n",
1108 			    com));
1109 		}
1110 		goto out;
1111 	}
1112 	SCARG(&bia, com) = com;
1113 	/* XXX NJWLWP */
1114 	error = sys_ioctl(curlwp, &bia, retval);
1115 out:
1116 	FILE_UNUSE(fp ,l);
1117 	return error;
1118 }
1119 
1120 /*
1121  * Set I/O permissions for a process. Just set the maximum level
1122  * right away (ignoring the argument), otherwise we would have
1123  * to rely on I/O permission maps, which are not implemented.
1124  */
1125 int
1126 linux_sys_iopl(struct lwp *l, void *v __unused, register_t *retval)
1127 {
1128 #if 0
1129 	struct linux_sys_iopl_args /* {
1130 		syscallarg(int) level;
1131 	} */ *uap = v;
1132 #endif
1133 	struct trapframe *fp = l->l_md.md_regs;
1134 
1135 	if (kauth_authorize_generic(l->l_cred, KAUTH_GENERIC_ISSUSER,
1136 	    &l->l_acflag) != 0)
1137 		return EPERM;
1138 	fp->tf_eflags |= PSL_IOPL;
1139 	*retval = 0;
1140 	return 0;
1141 }
1142 
1143 /*
1144  * See above. If a root process tries to set access to an I/O port,
1145  * just let it have the whole range.
1146  */
1147 int
1148 linux_sys_ioperm(l, v, retval)
1149 	struct lwp *l;
1150 	void *v;
1151 	register_t *retval;
1152 {
1153 	struct linux_sys_ioperm_args /* {
1154 		syscallarg(unsigned int) lo;
1155 		syscallarg(unsigned int) hi;
1156 		syscallarg(int) val;
1157 	} */ *uap = v;
1158 	struct trapframe *fp = l->l_md.md_regs;
1159 
1160 	if (kauth_authorize_generic(l->l_cred, KAUTH_GENERIC_ISSUSER,
1161 	    &l->l_acflag) != 0)
1162 		return EPERM;
1163 	if (SCARG(uap, val))
1164 		fp->tf_eflags |= PSL_IOPL;
1165 	*retval = 0;
1166 	return 0;
1167 }
1168 
1169 int
1170 linux_usertrap(struct lwp *l __unused, vaddr_t trapaddr __unused,
1171     void *arg __unused)
1172 {
1173 	return 0;
1174 }
1175