xref: /netbsd-src/sys/compat/linux/arch/i386/linux_machdep.c (revision 9c1da17e908379b8a470f1117a6395bd6a0ca559)
1 /*	$NetBSD: linux_machdep.c,v 1.111 2005/08/21 13:13:50 yamt Exp $	*/
2 
3 /*-
4  * Copyright (c) 1995, 2000 The NetBSD Foundation, Inc.
5  * All rights reserved.
6  *
7  * This code is derived from software contributed to The NetBSD Foundation
8  * by Frank van der Linden.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  * 3. All advertising materials mentioning features or use of this software
19  *    must display the following acknowledgement:
20  *	This product includes software developed by the NetBSD
21  *	Foundation, Inc. and its contributors.
22  * 4. Neither the name of The NetBSD Foundation nor the names of its
23  *    contributors may be used to endorse or promote products derived
24  *    from this software without specific prior written permission.
25  *
26  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
27  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
28  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
29  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
30  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36  * POSSIBILITY OF SUCH DAMAGE.
37  */
38 
39 #include <sys/cdefs.h>
40 __KERNEL_RCSID(0, "$NetBSD: linux_machdep.c,v 1.111 2005/08/21 13:13:50 yamt Exp $");
41 
42 #if defined(_KERNEL_OPT)
43 #include "opt_vm86.h"
44 #include "opt_user_ldt.h"
45 #endif
46 
47 #include <sys/param.h>
48 #include <sys/systm.h>
49 #include <sys/signalvar.h>
50 #include <sys/kernel.h>
51 #include <sys/proc.h>
52 #include <sys/user.h>
53 #include <sys/buf.h>
54 #include <sys/reboot.h>
55 #include <sys/conf.h>
56 #include <sys/exec.h>
57 #include <sys/file.h>
58 #include <sys/callout.h>
59 #include <sys/malloc.h>
60 #include <sys/mbuf.h>
61 #include <sys/msgbuf.h>
62 #include <sys/mount.h>
63 #include <sys/vnode.h>
64 #include <sys/device.h>
65 #include <sys/sa.h>
66 #include <sys/syscallargs.h>
67 #include <sys/filedesc.h>
68 #include <sys/exec_elf.h>
69 #include <sys/disklabel.h>
70 #include <sys/ioctl.h>
71 #include <sys/wait.h>
72 #include <miscfs/specfs/specdev.h>
73 
74 #include <compat/linux/common/linux_types.h>
75 #include <compat/linux/common/linux_signal.h>
76 #include <compat/linux/common/linux_util.h>
77 #include <compat/linux/common/linux_ioctl.h>
78 #include <compat/linux/common/linux_hdio.h>
79 #include <compat/linux/common/linux_exec.h>
80 #include <compat/linux/common/linux_machdep.h>
81 #include <compat/linux/common/linux_errno.h>
82 
83 #include <compat/linux/linux_syscallargs.h>
84 
85 #include <machine/cpu.h>
86 #include <machine/cpufunc.h>
87 #include <machine/psl.h>
88 #include <machine/reg.h>
89 #include <machine/segments.h>
90 #include <machine/specialreg.h>
91 #include <machine/sysarch.h>
92 #include <machine/vm86.h>
93 #include <machine/vmparam.h>
94 
95 /*
96  * To see whether wscons is configured (for virtual console ioctl calls).
97  */
98 #if defined(_KERNEL_OPT)
99 #include "wsdisplay.h"
100 #endif
101 #if (NWSDISPLAY > 0)
102 #include <dev/wscons/wsconsio.h>
103 #include <dev/wscons/wsdisplay_usl_io.h>
104 #if defined(_KERNEL_OPT)
105 #include "opt_xserver.h"
106 #endif
107 #endif
108 
109 #ifdef USER_LDT
110 #include <machine/cpu.h>
111 int linux_read_ldt __P((struct lwp *, struct linux_sys_modify_ldt_args *,
112     register_t *));
113 int linux_write_ldt __P((struct lwp *, struct linux_sys_modify_ldt_args *,
114     register_t *));
115 #endif
116 
117 #ifdef DEBUG_LINUX
118 #define DPRINTF(a) uprintf a
119 #else
120 #define DPRINTF(a)
121 #endif
122 
123 static struct biosdisk_info *fd2biosinfo __P((struct proc *, struct file *));
124 extern struct disklist *x86_alldisks;
125 static void linux_save_ucontext __P((struct lwp *, struct trapframe *,
126     const sigset_t *, struct sigaltstack *, struct linux_ucontext *));
127 static void linux_save_sigcontext __P((struct lwp *, struct trapframe *,
128     const sigset_t *, struct linux_sigcontext *));
129 static int linux_restore_sigcontext __P((struct lwp *,
130     struct linux_sigcontext *, register_t *));
131 static void linux_rt_sendsig __P((const ksiginfo_t *, const sigset_t *));
132 static void linux_old_sendsig __P((const ksiginfo_t *, const sigset_t *));
133 
134 extern char linux_sigcode[], linux_rt_sigcode[];
135 /*
136  * Deal with some i386-specific things in the Linux emulation code.
137  */
138 
139 void
140 linux_setregs(l, epp, stack)
141 	struct lwp *l;
142 	struct exec_package *epp;
143 	u_long stack;
144 {
145 	struct pcb *pcb = &l->l_addr->u_pcb;
146 	struct trapframe *tf;
147 
148 #if NNPX > 0
149 	/* If we were using the FPU, forget about it. */
150 	if (npxproc == l)
151 		npxdrop();
152 #endif
153 
154 #ifdef USER_LDT
155 	pmap_ldt_cleanup(l);
156 #endif
157 
158 	l->l_md.md_flags &= ~MDL_USEDFPU;
159 
160 	if (i386_use_fxsave) {
161 		pcb->pcb_savefpu.sv_xmm.sv_env.en_cw = __Linux_NPXCW__;
162 		pcb->pcb_savefpu.sv_xmm.sv_env.en_mxcsr = __INITIAL_MXCSR__;
163 	} else
164 		pcb->pcb_savefpu.sv_87.sv_env.en_cw = __Linux_NPXCW__;
165 
166 	tf = l->l_md.md_regs;
167 	tf->tf_gs = GSEL(GUDATA_SEL, SEL_UPL);
168 	tf->tf_fs = GSEL(GUDATA_SEL, SEL_UPL);
169 	tf->tf_es = GSEL(GUDATA_SEL, SEL_UPL);
170 	tf->tf_ds = GSEL(GUDATA_SEL, SEL_UPL);
171 	tf->tf_edi = 0;
172 	tf->tf_esi = 0;
173 	tf->tf_ebp = 0;
174 	tf->tf_ebx = (int)l->l_proc->p_psstr;
175 	tf->tf_edx = 0;
176 	tf->tf_ecx = 0;
177 	tf->tf_eax = 0;
178 	tf->tf_eip = epp->ep_entry;
179 	tf->tf_cs = GSEL(GUCODEBIG_SEL, SEL_UPL);
180 	tf->tf_eflags = PSL_USERSET;
181 	tf->tf_esp = stack;
182 	tf->tf_ss = GSEL(GUDATA_SEL, SEL_UPL);
183 }
184 
185 /*
186  * Send an interrupt to process.
187  *
188  * Stack is set up to allow sigcode stored
189  * in u. to call routine, followed by kcall
190  * to sigreturn routine below.  After sigreturn
191  * resets the signal mask, the stack, and the
192  * frame pointer, it returns to the user
193  * specified pc, psl.
194  */
195 
196 void
197 linux_sendsig(const ksiginfo_t *ksi, const sigset_t *mask)
198 {
199 	if (SIGACTION(curproc, ksi->ksi_signo).sa_flags & SA_SIGINFO)
200 		linux_rt_sendsig(ksi, mask);
201 	else
202 		linux_old_sendsig(ksi, mask);
203 }
204 
205 
206 static void
207 linux_save_ucontext(l, tf, mask, sas, uc)
208 	struct lwp *l;
209 	struct trapframe *tf;
210 	const sigset_t *mask;
211 	struct sigaltstack *sas;
212 	struct linux_ucontext *uc;
213 {
214 	uc->uc_flags = 0;
215 	uc->uc_link = NULL;
216 	native_to_linux_sigaltstack(&uc->uc_stack, sas);
217 	linux_save_sigcontext(l, tf, mask, &uc->uc_mcontext);
218 	native_to_linux_sigset(&uc->uc_sigmask, mask);
219 	(void)memset(&uc->uc_fpregs_mem, 0, sizeof(uc->uc_fpregs_mem));
220 }
221 
222 static void
223 linux_save_sigcontext(l, tf, mask, sc)
224 	struct lwp *l;
225 	struct trapframe *tf;
226 	const sigset_t *mask;
227 	struct linux_sigcontext *sc;
228 {
229 	/* Save register context. */
230 #ifdef VM86
231 	if (tf->tf_eflags & PSL_VM) {
232 		sc->sc_gs = tf->tf_vm86_gs;
233 		sc->sc_fs = tf->tf_vm86_fs;
234 		sc->sc_es = tf->tf_vm86_es;
235 		sc->sc_ds = tf->tf_vm86_ds;
236 		sc->sc_eflags = get_vflags(l);
237 	} else
238 #endif
239 	{
240 		sc->sc_gs = tf->tf_gs;
241 		sc->sc_fs = tf->tf_fs;
242 		sc->sc_es = tf->tf_es;
243 		sc->sc_ds = tf->tf_ds;
244 		sc->sc_eflags = tf->tf_eflags;
245 	}
246 	sc->sc_edi = tf->tf_edi;
247 	sc->sc_esi = tf->tf_esi;
248 	sc->sc_esp = tf->tf_esp;
249 	sc->sc_ebp = tf->tf_ebp;
250 	sc->sc_ebx = tf->tf_ebx;
251 	sc->sc_edx = tf->tf_edx;
252 	sc->sc_ecx = tf->tf_ecx;
253 	sc->sc_eax = tf->tf_eax;
254 	sc->sc_eip = tf->tf_eip;
255 	sc->sc_cs = tf->tf_cs;
256 	sc->sc_esp_at_signal = tf->tf_esp;
257 	sc->sc_ss = tf->tf_ss;
258 	sc->sc_err = tf->tf_err;
259 	sc->sc_trapno = tf->tf_trapno;
260 	sc->sc_cr2 = l->l_addr->u_pcb.pcb_cr2;
261 	sc->sc_387 = NULL;
262 
263 	/* Save signal stack. */
264 	/* Linux doesn't save the onstack flag in sigframe */
265 
266 	/* Save signal mask. */
267 	native_to_linux_old_sigset(&sc->sc_mask, mask);
268 }
269 
270 static void
271 linux_rt_sendsig(const ksiginfo_t *ksi, const sigset_t *mask)
272 {
273 	struct lwp *l = curlwp;
274 	struct proc *p = l->l_proc;
275 	struct trapframe *tf;
276 	struct linux_rt_sigframe *fp, frame;
277 	int onstack;
278 	linux_siginfo_t *lsi;
279 	int sig = ksi->ksi_signo;
280 	sig_t catcher = SIGACTION(p, sig).sa_handler;
281 	struct sigaltstack *sas = &p->p_sigctx.ps_sigstk;
282 
283 	tf = l->l_md.md_regs;
284 	/* Do we need to jump onto the signal stack? */
285 	onstack = (sas->ss_flags & (SS_DISABLE | SS_ONSTACK)) == 0 &&
286 	    (SIGACTION(p, sig).sa_flags & SA_ONSTACK) != 0;
287 
288 
289 	/* Allocate space for the signal handler context. */
290 	if (onstack)
291 		fp = (struct linux_rt_sigframe *)((caddr_t)sas->ss_sp +
292 		    sas->ss_size);
293 	else
294 		fp = (struct linux_rt_sigframe *)tf->tf_esp;
295 	fp--;
296 
297 	DPRINTF(("rt: onstack = %d, fp = %p sig = %d eip = 0x%x cr2 = 0x%x\n",
298 	    onstack, fp, sig, tf->tf_eip, l->l_addr->u_pcb.pcb_cr2));
299 
300 	/* Build stack frame for signal trampoline. */
301 	frame.sf_handler = catcher;
302 	frame.sf_sig = native_to_linux_signo[sig];
303 	frame.sf_sip = &fp->sf_si;
304 	frame.sf_ucp = &fp->sf_uc;
305 
306 	/*
307 	 * XXX: the following code assumes that the constants for
308 	 * siginfo are the same between linux and NetBSD.
309 	 */
310 	(void)memset(lsi = &frame.sf_si, 0, sizeof(frame.sf_si));
311 	lsi->lsi_errno = native_to_linux_errno[ksi->ksi_errno];
312 	lsi->lsi_code = ksi->ksi_code;
313 	switch (lsi->lsi_signo = frame.sf_sig) {
314 	case LINUX_SIGILL:
315 	case LINUX_SIGFPE:
316 	case LINUX_SIGSEGV:
317 	case LINUX_SIGBUS:
318 	case LINUX_SIGTRAP:
319 		lsi->lsi_addr = ksi->ksi_addr;
320 		break;
321 	case LINUX_SIGCHLD:
322 		lsi->lsi_uid = ksi->ksi_uid;
323 		lsi->lsi_pid = ksi->ksi_pid;
324 		lsi->lsi_utime = ksi->ksi_utime;
325 		lsi->lsi_stime = ksi->ksi_stime;
326 
327 		/* We use the same codes */
328 		lsi->lsi_code = ksi->ksi_code;
329 		/* XXX is that right? */
330 		lsi->lsi_status = WEXITSTATUS(ksi->ksi_status);
331 		break;
332 	case LINUX_SIGIO:
333 		lsi->lsi_band = ksi->ksi_band;
334 		lsi->lsi_fd = ksi->ksi_fd;
335 		break;
336 	default:
337 		lsi->lsi_uid = ksi->ksi_uid;
338 		lsi->lsi_pid = ksi->ksi_pid;
339 		if (lsi->lsi_signo == LINUX_SIGALRM ||
340 		    lsi->lsi_signo >= LINUX_SIGRTMIN)
341 			lsi->lsi_value.sival_ptr = ksi->ksi_sigval.sival_ptr;
342 		break;
343 	}
344 
345 	/* Save register context. */
346 	linux_save_ucontext(l, tf, mask, sas, &frame.sf_uc);
347 
348 	if (copyout(&frame, fp, sizeof(frame)) != 0) {
349 		/*
350 		 * Process has trashed its stack; give it an illegal
351 		 * instruction to halt it in its tracks.
352 		 */
353 		sigexit(l, SIGILL);
354 		/* NOTREACHED */
355 	}
356 
357 	/*
358 	 * Build context to run handler in.
359 	 */
360 	tf->tf_gs = GSEL(GUDATA_SEL, SEL_UPL);
361 	tf->tf_fs = GSEL(GUDATA_SEL, SEL_UPL);
362 	tf->tf_es = GSEL(GUDATA_SEL, SEL_UPL);
363 	tf->tf_ds = GSEL(GUDATA_SEL, SEL_UPL);
364 	tf->tf_eip = ((int)p->p_sigctx.ps_sigcode) +
365 	    (linux_rt_sigcode - linux_sigcode);
366 	tf->tf_cs = GSEL(GUCODE_SEL, SEL_UPL);
367 	tf->tf_eflags &= ~(PSL_T|PSL_VM|PSL_AC);
368 	tf->tf_esp = (int)fp;
369 	tf->tf_ss = GSEL(GUDATA_SEL, SEL_UPL);
370 
371 	/* Remember that we're now on the signal stack. */
372 	if (onstack)
373 		sas->ss_flags |= SS_ONSTACK;
374 }
375 
376 static void
377 linux_old_sendsig(const ksiginfo_t *ksi, const sigset_t *mask)
378 {
379 	struct lwp *l = curlwp;
380 	struct proc *p = l->l_proc;
381 	struct trapframe *tf;
382 	struct linux_sigframe *fp, frame;
383 	int onstack;
384 	int sig = ksi->ksi_signo;
385 	sig_t catcher = SIGACTION(p, sig).sa_handler;
386 	struct sigaltstack *sas = &p->p_sigctx.ps_sigstk;
387 
388 	tf = l->l_md.md_regs;
389 
390 	/* Do we need to jump onto the signal stack? */
391 	onstack = (sas->ss_flags & (SS_DISABLE | SS_ONSTACK)) == 0 &&
392 	    (SIGACTION(p, sig).sa_flags & SA_ONSTACK) != 0;
393 
394 	/* Allocate space for the signal handler context. */
395 	if (onstack)
396 		fp = (struct linux_sigframe *) ((caddr_t)sas->ss_sp +
397 		    sas->ss_size);
398 	else
399 		fp = (struct linux_sigframe *)tf->tf_esp;
400 	fp--;
401 
402 	DPRINTF(("old: onstack = %d, fp = %p sig = %d eip = 0x%x cr2 = 0x%x\n",
403 	    onstack, fp, sig, tf->tf_eip, l->l_addr->u_pcb.pcb_cr2));
404 
405 	/* Build stack frame for signal trampoline. */
406 	frame.sf_handler = catcher;
407 	frame.sf_sig = native_to_linux_signo[sig];
408 
409 	linux_save_sigcontext(l, tf, mask, &frame.sf_sc);
410 
411 	if (copyout(&frame, fp, sizeof(frame)) != 0) {
412 		/*
413 		 * Process has trashed its stack; give it an illegal
414 		 * instruction to halt it in its tracks.
415 		 */
416 		sigexit(l, SIGILL);
417 		/* NOTREACHED */
418 	}
419 
420 	/*
421 	 * Build context to run handler in.
422 	 */
423 	tf->tf_gs = GSEL(GUDATA_SEL, SEL_UPL);
424 	tf->tf_fs = GSEL(GUDATA_SEL, SEL_UPL);
425 	tf->tf_es = GSEL(GUDATA_SEL, SEL_UPL);
426 	tf->tf_ds = GSEL(GUDATA_SEL, SEL_UPL);
427 	tf->tf_eip = (int)p->p_sigctx.ps_sigcode;
428 	tf->tf_cs = GSEL(GUCODEBIG_SEL, SEL_UPL);
429 	tf->tf_eflags &= ~(PSL_T|PSL_VM|PSL_AC);
430 	tf->tf_esp = (int)fp;
431 	tf->tf_ss = GSEL(GUDATA_SEL, SEL_UPL);
432 
433 	/* Remember that we're now on the signal stack. */
434 	if (onstack)
435 		sas->ss_flags |= SS_ONSTACK;
436 }
437 
438 /*
439  * System call to cleanup state after a signal
440  * has been taken.  Reset signal mask and
441  * stack state from context left by sendsig (above).
442  * Return to previous pc and psl as specified by
443  * context left by sendsig. Check carefully to
444  * make sure that the user has not modified the
445  * psl to gain improper privileges or to cause
446  * a machine fault.
447  */
448 int
449 linux_sys_rt_sigreturn(l, v, retval)
450 	struct lwp *l;
451 	void *v;
452 	register_t *retval;
453 {
454 	struct linux_sys_rt_sigreturn_args /* {
455 		syscallarg(struct linux_ucontext *) ucp;
456 	} */ *uap = v;
457 	struct linux_ucontext context, *ucp = SCARG(uap, ucp);
458 	int error;
459 
460 	/*
461 	 * The trampoline code hands us the context.
462 	 * It is unsafe to keep track of it ourselves, in the event that a
463 	 * program jumps out of a signal handler.
464 	 */
465 	if ((error = copyin(ucp, &context, sizeof(*ucp))) != 0)
466 		return error;
467 
468 	/* XXX XAX we can do better here by using more of the ucontext */
469 	return linux_restore_sigcontext(l, &context.uc_mcontext, retval);
470 }
471 
472 int
473 linux_sys_sigreturn(l, v, retval)
474 	struct lwp *l;
475 	void *v;
476 	register_t *retval;
477 {
478 	struct linux_sys_sigreturn_args /* {
479 		syscallarg(struct linux_sigcontext *) scp;
480 	} */ *uap = v;
481 	struct linux_sigcontext context, *scp = SCARG(uap, scp);
482 	int error;
483 
484 	/*
485 	 * The trampoline code hands us the context.
486 	 * It is unsafe to keep track of it ourselves, in the event that a
487 	 * program jumps out of a signal handler.
488 	 */
489 	if ((error = copyin((caddr_t)scp, &context, sizeof(*scp))) != 0)
490 		return error;
491 	return linux_restore_sigcontext(l, &context, retval);
492 }
493 
494 static int
495 linux_restore_sigcontext(l, scp, retval)
496 	struct lwp *l;
497 	struct linux_sigcontext *scp;
498 	register_t *retval;
499 {
500 	struct proc *p = l->l_proc;
501 	struct sigaltstack *sas = &p->p_sigctx.ps_sigstk;
502 	struct trapframe *tf;
503 	sigset_t mask;
504 	ssize_t ss_gap;
505 	/* Restore register context. */
506 	tf = l->l_md.md_regs;
507 
508 	DPRINTF(("sigreturn enter esp=%x eip=%x\n", tf->tf_esp, tf->tf_eip));
509 #ifdef VM86
510 	if (scp->sc_eflags & PSL_VM) {
511 		void syscall_vm86 __P((struct trapframe *));
512 
513 		tf->tf_vm86_gs = scp->sc_gs;
514 		tf->tf_vm86_fs = scp->sc_fs;
515 		tf->tf_vm86_es = scp->sc_es;
516 		tf->tf_vm86_ds = scp->sc_ds;
517 		set_vflags(l, scp->sc_eflags);
518 		p->p_md.md_syscall = syscall_vm86;
519 	} else
520 #endif
521 	{
522 		/*
523 		 * Check for security violations.  If we're returning to
524 		 * protected mode, the CPU will validate the segment registers
525 		 * automatically and generate a trap on violations.  We handle
526 		 * the trap, rather than doing all of the checking here.
527 		 */
528 		if (((scp->sc_eflags ^ tf->tf_eflags) & PSL_USERSTATIC) != 0 ||
529 		    !USERMODE(scp->sc_cs, scp->sc_eflags))
530 			return EINVAL;
531 
532 		tf->tf_gs = scp->sc_gs;
533 		tf->tf_fs = scp->sc_fs;
534 		tf->tf_es = scp->sc_es;
535 		tf->tf_ds = scp->sc_ds;
536 #ifdef VM86
537 		if (tf->tf_eflags & PSL_VM)
538 			(*p->p_emul->e_syscall_intern)(p);
539 #endif
540 		tf->tf_eflags = scp->sc_eflags;
541 	}
542 	tf->tf_edi = scp->sc_edi;
543 	tf->tf_esi = scp->sc_esi;
544 	tf->tf_ebp = scp->sc_ebp;
545 	tf->tf_ebx = scp->sc_ebx;
546 	tf->tf_edx = scp->sc_edx;
547 	tf->tf_ecx = scp->sc_ecx;
548 	tf->tf_eax = scp->sc_eax;
549 	tf->tf_eip = scp->sc_eip;
550 	tf->tf_cs = scp->sc_cs;
551 	tf->tf_esp = scp->sc_esp_at_signal;
552 	tf->tf_ss = scp->sc_ss;
553 
554 	/* Restore signal stack. */
555 	/*
556 	 * Linux really does it this way; it doesn't have space in sigframe
557 	 * to save the onstack flag.
558 	 */
559 	ss_gap = (ssize_t)
560 	    ((caddr_t) scp->sc_esp_at_signal - (caddr_t) sas->ss_sp);
561 	if (ss_gap >= 0 && ss_gap < sas->ss_size)
562 		sas->ss_flags |= SS_ONSTACK;
563 	else
564 		sas->ss_flags &= ~SS_ONSTACK;
565 
566 	/* Restore signal mask. */
567 	linux_old_to_native_sigset(&mask, &scp->sc_mask);
568 	(void) sigprocmask1(p, SIG_SETMASK, &mask, 0);
569 	DPRINTF(("sigreturn exit esp=%x eip=%x\n", tf->tf_esp, tf->tf_eip));
570 	return EJUSTRETURN;
571 }
572 
573 #ifdef USER_LDT
574 
575 int
576 linux_read_ldt(l, uap, retval)
577 	struct lwp *l;
578 	struct linux_sys_modify_ldt_args /* {
579 		syscallarg(int) func;
580 		syscallarg(void *) ptr;
581 		syscallarg(size_t) bytecount;
582 	} */ *uap;
583 	register_t *retval;
584 {
585 	struct proc *p = l->l_proc;
586 	struct i386_get_ldt_args gl;
587 	int error;
588 	caddr_t sg;
589 	char *parms;
590 
591 	DPRINTF(("linux_read_ldt!"));
592 	sg = stackgap_init(p, 0);
593 
594 	gl.start = 0;
595 	gl.desc = SCARG(uap, ptr);
596 	gl.num = SCARG(uap, bytecount) / sizeof(union descriptor);
597 
598 	parms = stackgap_alloc(p, &sg, sizeof(gl));
599 
600 	if ((error = copyout(&gl, parms, sizeof(gl))) != 0)
601 		return (error);
602 
603 	if ((error = i386_get_ldt(l, parms, retval)) != 0)
604 		return (error);
605 
606 	*retval *= sizeof(union descriptor);
607 	return (0);
608 }
609 
610 struct linux_ldt_info {
611 	u_int entry_number;
612 	u_long base_addr;
613 	u_int limit;
614 	u_int seg_32bit:1;
615 	u_int contents:2;
616 	u_int read_exec_only:1;
617 	u_int limit_in_pages:1;
618 	u_int seg_not_present:1;
619 	u_int useable:1;
620 };
621 
622 int
623 linux_write_ldt(l, uap, retval)
624 	struct lwp *l;
625 	struct linux_sys_modify_ldt_args /* {
626 		syscallarg(int) func;
627 		syscallarg(void *) ptr;
628 		syscallarg(size_t) bytecount;
629 	} */ *uap;
630 	register_t *retval;
631 {
632 	struct proc *p = l->l_proc;
633 	struct linux_ldt_info ldt_info;
634 	struct segment_descriptor sd;
635 	struct i386_set_ldt_args sl;
636 	int error;
637 	caddr_t sg;
638 	char *parms;
639 	int oldmode = (int)retval[0];
640 
641 	DPRINTF(("linux_write_ldt %d\n", oldmode));
642 	if (SCARG(uap, bytecount) != sizeof(ldt_info))
643 		return (EINVAL);
644 	if ((error = copyin(SCARG(uap, ptr), &ldt_info, sizeof(ldt_info))) != 0)
645 		return error;
646 	if (ldt_info.entry_number >= 8192)
647 		return (EINVAL);
648 	if (ldt_info.contents == 3) {
649 		if (oldmode)
650 			return (EINVAL);
651 		if (ldt_info.seg_not_present)
652 			return (EINVAL);
653 	}
654 
655 	if (ldt_info.base_addr == 0 && ldt_info.limit == 0 &&
656 	    (oldmode || (ldt_info.contents == 0 &&
657 	    ldt_info.read_exec_only == 1 && ldt_info.seg_32bit == 0 &&
658 	    ldt_info.limit_in_pages == 0 && ldt_info.seg_not_present == 1 &&
659 	    ldt_info.useable == 0))) {
660 		/* this means you should zero the ldt */
661 		(void)memset(&sd, 0, sizeof(sd));
662 	} else {
663 		sd.sd_lobase = ldt_info.base_addr & 0xffffff;
664 		sd.sd_hibase = (ldt_info.base_addr >> 24) & 0xff;
665 		sd.sd_lolimit = ldt_info.limit & 0xffff;
666 		sd.sd_hilimit = (ldt_info.limit >> 16) & 0xf;
667 		sd.sd_type = 16 | (ldt_info.contents << 2) |
668 		    (!ldt_info.read_exec_only << 1);
669 		sd.sd_dpl = SEL_UPL;
670 		sd.sd_p = !ldt_info.seg_not_present;
671 		sd.sd_def32 = ldt_info.seg_32bit;
672 		sd.sd_gran = ldt_info.limit_in_pages;
673 		if (!oldmode)
674 			sd.sd_xx = ldt_info.useable;
675 		else
676 			sd.sd_xx = 0;
677 	}
678 	sg = stackgap_init(p, 0);
679 	sl.start = ldt_info.entry_number;
680 	sl.desc = stackgap_alloc(p, &sg, sizeof(sd));
681 	sl.num = 1;
682 
683 	DPRINTF(("linux_write_ldt: idx=%d, base=0x%lx, limit=0x%x\n",
684 	    ldt_info.entry_number, ldt_info.base_addr, ldt_info.limit));
685 
686 	parms = stackgap_alloc(p, &sg, sizeof(sl));
687 
688 	if ((error = copyout(&sd, sl.desc, sizeof(sd))) != 0)
689 		return (error);
690 	if ((error = copyout(&sl, parms, sizeof(sl))) != 0)
691 		return (error);
692 
693 	if ((error = i386_set_ldt(l, parms, retval)) != 0)
694 		return (error);
695 
696 	*retval = 0;
697 	return (0);
698 }
699 
700 #endif /* USER_LDT */
701 
702 int
703 linux_sys_modify_ldt(l, v, retval)
704 	struct lwp *l;
705 	void *v;
706 	register_t *retval;
707 {
708 	struct linux_sys_modify_ldt_args /* {
709 		syscallarg(int) func;
710 		syscallarg(void *) ptr;
711 		syscallarg(size_t) bytecount;
712 	} */ *uap = v;
713 
714 	switch (SCARG(uap, func)) {
715 #ifdef USER_LDT
716 	case 0:
717 		return linux_read_ldt(l, uap, retval);
718 	case 1:
719 		retval[0] = 1;
720 		return linux_write_ldt(l, uap, retval);
721 	case 2:
722 #ifdef notyet
723 		return (linux_read_default_ldt(l, uap, retval);
724 #else
725 		return (ENOSYS);
726 #endif
727 	case 0x11:
728 		retval[0] = 0;
729 		return linux_write_ldt(l, uap, retval);
730 #endif /* USER_LDT */
731 
732 	default:
733 		return (ENOSYS);
734 	}
735 }
736 
737 /*
738  * XXX Pathetic hack to make svgalib work. This will fake the major
739  * device number of an opened VT so that svgalib likes it. grmbl.
740  * Should probably do it 'wrong the right way' and use a mapping
741  * array for all major device numbers, and map linux_mknod too.
742  */
743 dev_t
744 linux_fakedev(dev, raw)
745 	dev_t dev;
746 	int raw;
747 {
748 	extern const struct cdevsw ptc_cdevsw, pts_cdevsw;
749 	const struct cdevsw *cd = cdevsw_lookup(dev);
750 
751 	if (raw) {
752 #if (NWSDISPLAY > 0)
753 		extern const struct cdevsw wsdisplay_cdevsw;
754 		if (cd == &wsdisplay_cdevsw)
755 			return makedev(LINUX_CONS_MAJOR, (minor(dev) + 1));
756 #endif
757 	}
758 
759 	if (cd == &ptc_cdevsw)
760 		return makedev(LINUX_PTC_MAJOR, minor(dev));
761 	if (cd == &pts_cdevsw)
762 		return makedev(LINUX_PTS_MAJOR, minor(dev));
763 
764 	return dev;
765 }
766 
767 #if (NWSDISPLAY > 0)
768 /*
769  * That's not complete, but enough to get an X server running.
770  */
771 #define NR_KEYS 128
772 static const u_short plain_map[NR_KEYS] = {
773 	0x0200,	0x001b,	0x0031,	0x0032,	0x0033,	0x0034,	0x0035,	0x0036,
774 	0x0037,	0x0038,	0x0039,	0x0030,	0x002d,	0x003d,	0x007f,	0x0009,
775 	0x0b71,	0x0b77,	0x0b65,	0x0b72,	0x0b74,	0x0b79,	0x0b75,	0x0b69,
776 	0x0b6f,	0x0b70,	0x005b,	0x005d,	0x0201,	0x0702,	0x0b61,	0x0b73,
777 	0x0b64,	0x0b66,	0x0b67,	0x0b68,	0x0b6a,	0x0b6b,	0x0b6c,	0x003b,
778 	0x0027,	0x0060,	0x0700,	0x005c,	0x0b7a,	0x0b78,	0x0b63,	0x0b76,
779 	0x0b62,	0x0b6e,	0x0b6d,	0x002c,	0x002e,	0x002f,	0x0700,	0x030c,
780 	0x0703,	0x0020,	0x0207,	0x0100,	0x0101,	0x0102,	0x0103,	0x0104,
781 	0x0105,	0x0106,	0x0107,	0x0108,	0x0109,	0x0208,	0x0209,	0x0307,
782 	0x0308,	0x0309,	0x030b,	0x0304,	0x0305,	0x0306,	0x030a,	0x0301,
783 	0x0302,	0x0303,	0x0300,	0x0310,	0x0206,	0x0200,	0x003c,	0x010a,
784 	0x010b,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
785 	0x030e,	0x0702,	0x030d,	0x001c,	0x0701,	0x0205,	0x0114,	0x0603,
786 	0x0118,	0x0601,	0x0602,	0x0117,	0x0600,	0x0119,	0x0115,	0x0116,
787 	0x011a,	0x010c,	0x010d,	0x011b,	0x011c,	0x0110,	0x0311,	0x011d,
788 	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
789 }, shift_map[NR_KEYS] = {
790 	0x0200,	0x001b,	0x0021,	0x0040,	0x0023,	0x0024,	0x0025,	0x005e,
791 	0x0026,	0x002a,	0x0028,	0x0029,	0x005f,	0x002b,	0x007f,	0x0009,
792 	0x0b51,	0x0b57,	0x0b45,	0x0b52,	0x0b54,	0x0b59,	0x0b55,	0x0b49,
793 	0x0b4f,	0x0b50,	0x007b,	0x007d,	0x0201,	0x0702,	0x0b41,	0x0b53,
794 	0x0b44,	0x0b46,	0x0b47,	0x0b48,	0x0b4a,	0x0b4b,	0x0b4c,	0x003a,
795 	0x0022,	0x007e,	0x0700,	0x007c,	0x0b5a,	0x0b58,	0x0b43,	0x0b56,
796 	0x0b42,	0x0b4e,	0x0b4d,	0x003c,	0x003e,	0x003f,	0x0700,	0x030c,
797 	0x0703,	0x0020,	0x0207,	0x010a,	0x010b,	0x010c,	0x010d,	0x010e,
798 	0x010f,	0x0110,	0x0111,	0x0112,	0x0113,	0x0213,	0x0203,	0x0307,
799 	0x0308,	0x0309,	0x030b,	0x0304,	0x0305,	0x0306,	0x030a,	0x0301,
800 	0x0302,	0x0303,	0x0300,	0x0310,	0x0206,	0x0200,	0x003e,	0x010a,
801 	0x010b,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
802 	0x030e,	0x0702,	0x030d,	0x0200,	0x0701,	0x0205,	0x0114,	0x0603,
803 	0x020b,	0x0601,	0x0602,	0x0117,	0x0600,	0x020a,	0x0115,	0x0116,
804 	0x011a,	0x010c,	0x010d,	0x011b,	0x011c,	0x0110,	0x0311,	0x011d,
805 	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
806 }, altgr_map[NR_KEYS] = {
807 	0x0200,	0x0200,	0x0200,	0x0040,	0x0200,	0x0024,	0x0200,	0x0200,
808 	0x007b,	0x005b,	0x005d,	0x007d,	0x005c,	0x0200,	0x0200,	0x0200,
809 	0x0b71,	0x0b77,	0x0918,	0x0b72,	0x0b74,	0x0b79,	0x0b75,	0x0b69,
810 	0x0b6f,	0x0b70,	0x0200,	0x007e,	0x0201,	0x0702,	0x0914,	0x0b73,
811 	0x0917,	0x0919,	0x0b67,	0x0b68,	0x0b6a,	0x0b6b,	0x0b6c,	0x0200,
812 	0x0200,	0x0200,	0x0700,	0x0200,	0x0b7a,	0x0b78,	0x0916,	0x0b76,
813 	0x0915,	0x0b6e,	0x0b6d,	0x0200,	0x0200,	0x0200,	0x0700,	0x030c,
814 	0x0703,	0x0200,	0x0207,	0x050c,	0x050d,	0x050e,	0x050f,	0x0510,
815 	0x0511,	0x0512,	0x0513,	0x0514,	0x0515,	0x0208,	0x0202,	0x0911,
816 	0x0912,	0x0913,	0x030b,	0x090e,	0x090f,	0x0910,	0x030a,	0x090b,
817 	0x090c,	0x090d,	0x090a,	0x0310,	0x0206,	0x0200,	0x007c,	0x0516,
818 	0x0517,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
819 	0x030e,	0x0702,	0x030d,	0x0200,	0x0701,	0x0205,	0x0114,	0x0603,
820 	0x0118,	0x0601,	0x0602,	0x0117,	0x0600,	0x0119,	0x0115,	0x0116,
821 	0x011a,	0x010c,	0x010d,	0x011b,	0x011c,	0x0110,	0x0311,	0x011d,
822 	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
823 }, ctrl_map[NR_KEYS] = {
824 	0x0200,	0x0200,	0x0200,	0x0000,	0x001b,	0x001c,	0x001d,	0x001e,
825 	0x001f,	0x007f,	0x0200,	0x0200,	0x001f,	0x0200,	0x0008,	0x0200,
826 	0x0011,	0x0017,	0x0005,	0x0012,	0x0014,	0x0019,	0x0015,	0x0009,
827 	0x000f,	0x0010,	0x001b,	0x001d,	0x0201,	0x0702,	0x0001,	0x0013,
828 	0x0004,	0x0006,	0x0007,	0x0008,	0x000a,	0x000b,	0x000c,	0x0200,
829 	0x0007,	0x0000,	0x0700,	0x001c,	0x001a,	0x0018,	0x0003,	0x0016,
830 	0x0002,	0x000e,	0x000d,	0x0200,	0x020e,	0x007f,	0x0700,	0x030c,
831 	0x0703,	0x0000,	0x0207,	0x0100,	0x0101,	0x0102,	0x0103,	0x0104,
832 	0x0105,	0x0106,	0x0107,	0x0108,	0x0109,	0x0208,	0x0204,	0x0307,
833 	0x0308,	0x0309,	0x030b,	0x0304,	0x0305,	0x0306,	0x030a,	0x0301,
834 	0x0302,	0x0303,	0x0300,	0x0310,	0x0206,	0x0200,	0x0200,	0x010a,
835 	0x010b,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
836 	0x030e,	0x0702,	0x030d,	0x001c,	0x0701,	0x0205,	0x0114,	0x0603,
837 	0x0118,	0x0601,	0x0602,	0x0117,	0x0600,	0x0119,	0x0115,	0x0116,
838 	0x011a,	0x010c,	0x010d,	0x011b,	0x011c,	0x0110,	0x0311,	0x011d,
839 	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
840 };
841 
842 const u_short * const linux_keytabs[] = {
843 	plain_map, shift_map, altgr_map, altgr_map, ctrl_map
844 };
845 #endif
846 
847 static struct biosdisk_info *
848 fd2biosinfo(p, fp)
849 	struct proc *p;
850 	struct file *fp;
851 {
852 	struct vnode *vp;
853 	const char *blkname;
854 	char diskname[16];
855 	int i;
856 	struct nativedisk_info *nip;
857 	struct disklist *dl = x86_alldisks;
858 
859 	if (fp->f_type != DTYPE_VNODE)
860 		return NULL;
861 	vp = (struct vnode *)fp->f_data;
862 
863 	if (vp->v_type != VBLK)
864 		return NULL;
865 
866 	blkname = devsw_blk2name(major(vp->v_rdev));
867 	snprintf(diskname, sizeof diskname, "%s%u", blkname,
868 	    DISKUNIT(vp->v_rdev));
869 
870 	for (i = 0; i < dl->dl_nnativedisks; i++) {
871 		nip = &dl->dl_nativedisks[i];
872 		if (strcmp(diskname, nip->ni_devname))
873 			continue;
874 		if (nip->ni_nmatches != 0)
875 			return &dl->dl_biosdisks[nip->ni_biosmatches[0]];
876 	}
877 
878 	return NULL;
879 }
880 
881 
882 /*
883  * We come here in a last attempt to satisfy a Linux ioctl() call
884  */
885 int
886 linux_machdepioctl(p, v, retval)
887 	struct proc *p;
888 	void *v;
889 	register_t *retval;
890 {
891 	struct linux_sys_ioctl_args /* {
892 		syscallarg(int) fd;
893 		syscallarg(u_long) com;
894 		syscallarg(caddr_t) data;
895 	} */ *uap = v;
896 	struct sys_ioctl_args bia;
897 	u_long com;
898 	int error, error1;
899 #if (NWSDISPLAY > 0)
900 	struct vt_mode lvt;
901 	caddr_t bvtp, sg;
902 	struct kbentry kbe;
903 #endif
904 	struct linux_hd_geometry hdg;
905 	struct linux_hd_big_geometry hdg_big;
906 	struct biosdisk_info *bip;
907 	struct filedesc *fdp;
908 	struct file *fp;
909 	int fd;
910 	struct disklabel label, *labp;
911 	struct partinfo partp;
912 	int (*ioctlf)(struct file *, u_long, void *, struct proc *);
913 	u_long start, biostotal, realtotal;
914 	u_char heads, sectors;
915 	u_int cylinders;
916 	struct ioctl_pt pt;
917 
918 	fd = SCARG(uap, fd);
919 	SCARG(&bia, fd) = fd;
920 	SCARG(&bia, data) = SCARG(uap, data);
921 	com = SCARG(uap, com);
922 
923 	fdp = p->p_fd;
924 
925 	if ((fp = fd_getfile(fdp, fd)) == NULL)
926 		return (EBADF);
927 
928 	FILE_USE(fp);
929 
930 	switch (com) {
931 #if (NWSDISPLAY > 0)
932 	case LINUX_KDGKBMODE:
933 		com = KDGKBMODE;
934 		break;
935 	case LINUX_KDSKBMODE:
936 		com = KDSKBMODE;
937 		if ((unsigned)SCARG(uap, data) == LINUX_K_MEDIUMRAW)
938 			SCARG(&bia, data) = (caddr_t)K_RAW;
939 		break;
940 	case LINUX_KIOCSOUND:
941 		SCARG(&bia, data) =
942 		    (caddr_t)(((unsigned long)SCARG(&bia, data)) & 0xffff);
943 		/* fall through */
944 	case LINUX_KDMKTONE:
945 		com = KDMKTONE;
946 		break;
947 	case LINUX_KDSETMODE:
948 		com = KDSETMODE;
949 		break;
950 	case LINUX_KDGETMODE:
951 		/* KD_* values are equal to the wscons numbers */
952 		com = WSDISPLAYIO_GMODE;
953 		break;
954 	case LINUX_KDENABIO:
955 		com = KDENABIO;
956 		break;
957 	case LINUX_KDDISABIO:
958 		com = KDDISABIO;
959 		break;
960 	case LINUX_KDGETLED:
961 		com = KDGETLED;
962 		break;
963 	case LINUX_KDSETLED:
964 		com = KDSETLED;
965 		break;
966 	case LINUX_VT_OPENQRY:
967 		com = VT_OPENQRY;
968 		break;
969 	case LINUX_VT_GETMODE:
970 		SCARG(&bia, com) = VT_GETMODE;
971 		/* XXX NJWLWP */
972 		if ((error = sys_ioctl(curlwp, &bia, retval)))
973 			goto out;
974 		if ((error = copyin(SCARG(uap, data), (caddr_t)&lvt,
975 		    sizeof (struct vt_mode))))
976 			goto out;
977 		lvt.relsig = native_to_linux_signo[lvt.relsig];
978 		lvt.acqsig = native_to_linux_signo[lvt.acqsig];
979 		lvt.frsig = native_to_linux_signo[lvt.frsig];
980 		error = copyout((caddr_t)&lvt, SCARG(uap, data),
981 		    sizeof (struct vt_mode));
982 		goto out;
983 	case LINUX_VT_SETMODE:
984 		com = VT_SETMODE;
985 		if ((error = copyin(SCARG(uap, data), (caddr_t)&lvt,
986 		    sizeof (struct vt_mode))))
987 			goto out;
988 		lvt.relsig = linux_to_native_signo[lvt.relsig];
989 		lvt.acqsig = linux_to_native_signo[lvt.acqsig];
990 		lvt.frsig = linux_to_native_signo[lvt.frsig];
991 		sg = stackgap_init(p, 0);
992 		bvtp = stackgap_alloc(p, &sg, sizeof (struct vt_mode));
993 		if ((error = copyout(&lvt, bvtp, sizeof (struct vt_mode))))
994 			goto out;
995 		SCARG(&bia, data) = bvtp;
996 		break;
997 	case LINUX_VT_DISALLOCATE:
998 		/* XXX should use WSDISPLAYIO_DELSCREEN */
999 		error = 0;
1000 		goto out;
1001 	case LINUX_VT_RELDISP:
1002 		com = VT_RELDISP;
1003 		break;
1004 	case LINUX_VT_ACTIVATE:
1005 		com = VT_ACTIVATE;
1006 		break;
1007 	case LINUX_VT_WAITACTIVE:
1008 		com = VT_WAITACTIVE;
1009 		break;
1010 	case LINUX_VT_GETSTATE:
1011 		com = VT_GETSTATE;
1012 		break;
1013 	case LINUX_KDGKBTYPE:
1014 	    {
1015 		static const u_int8_t kb101 = KB_101;
1016 
1017 		/* This is what Linux does. */
1018 		error = copyout(&kb101, SCARG(uap, data), 1);
1019 		goto out;
1020 	    }
1021 	case LINUX_KDGKBENT:
1022 		/*
1023 		 * The Linux KDGKBENT ioctl is different from the
1024 		 * SYSV original. So we handle it in machdep code.
1025 		 * XXX We should use keyboard mapping information
1026 		 * from wsdisplay, but this would be expensive.
1027 		 */
1028 		if ((error = copyin(SCARG(uap, data), &kbe,
1029 				    sizeof(struct kbentry))))
1030 			goto out;
1031 		if (kbe.kb_table >= sizeof(linux_keytabs) / sizeof(u_short *)
1032 		    || kbe.kb_index >= NR_KEYS) {
1033 			error = EINVAL;
1034 			goto out;
1035 		}
1036 		kbe.kb_value = linux_keytabs[kbe.kb_table][kbe.kb_index];
1037 		error = copyout(&kbe, SCARG(uap, data),
1038 				sizeof(struct kbentry));
1039 		goto out;
1040 #endif
1041 	case LINUX_HDIO_GETGEO:
1042 	case LINUX_HDIO_GETGEO_BIG:
1043 		/*
1044 		 * Try to mimic Linux behaviour: return the BIOS geometry
1045 		 * if possible (extending its # of cylinders if it's beyond
1046 		 * the 1023 limit), fall back to the MI geometry (i.e.
1047 		 * the real geometry) if not found, by returning an
1048 		 * error. See common/linux_hdio.c
1049 		 */
1050 		bip = fd2biosinfo(p, fp);
1051 		ioctlf = fp->f_ops->fo_ioctl;
1052 		error = ioctlf(fp, DIOCGDEFLABEL, (caddr_t)&label, p);
1053 		error1 = ioctlf(fp, DIOCGPART, (caddr_t)&partp, p);
1054 		if (error != 0 && error1 != 0) {
1055 			error = error1;
1056 			goto out;
1057 		}
1058 		labp = error != 0 ? &label : partp.disklab;
1059 		start = error1 != 0 ? partp.part->p_offset : 0;
1060 		if (bip != NULL && bip->bi_head != 0 && bip->bi_sec != 0
1061 		    && bip->bi_cyl != 0) {
1062 			heads = bip->bi_head;
1063 			sectors = bip->bi_sec;
1064 			cylinders = bip->bi_cyl;
1065 			biostotal = heads * sectors * cylinders;
1066 			realtotal = labp->d_ntracks * labp->d_nsectors *
1067 			    labp->d_ncylinders;
1068 			if (realtotal > biostotal)
1069 				cylinders = realtotal / (heads * sectors);
1070 		} else {
1071 			heads = labp->d_ntracks;
1072 			cylinders = labp->d_ncylinders;
1073 			sectors = labp->d_nsectors;
1074 		}
1075 		if (com == LINUX_HDIO_GETGEO) {
1076 			hdg.start = start;
1077 			hdg.heads = heads;
1078 			hdg.cylinders = cylinders;
1079 			hdg.sectors = sectors;
1080 			error = copyout(&hdg, SCARG(uap, data), sizeof hdg);
1081 			goto out;
1082 		} else {
1083 			hdg_big.start = start;
1084 			hdg_big.heads = heads;
1085 			hdg_big.cylinders = cylinders;
1086 			hdg_big.sectors = sectors;
1087 			error = copyout(&hdg_big, SCARG(uap, data),
1088 			    sizeof hdg_big);
1089 			goto out;
1090 		}
1091 
1092 	default:
1093 		/*
1094 		 * Unknown to us. If it's on a device, just pass it through
1095 		 * using PTIOCLINUX, the device itself might be able to
1096 		 * make some sense of it.
1097 		 * XXX hack: if the function returns EJUSTRETURN,
1098 		 * it has stuffed a sysctl return value in pt.data.
1099 		 */
1100 		ioctlf = fp->f_ops->fo_ioctl;
1101 		pt.com = SCARG(uap, com);
1102 		pt.data = SCARG(uap, data);
1103 		error = ioctlf(fp, PTIOCLINUX, (caddr_t)&pt, p);
1104 		if (error == EJUSTRETURN) {
1105 			retval[0] = (register_t)pt.data;
1106 			error = 0;
1107 		}
1108 
1109 		if (error == ENOTTY)
1110 			DPRINTF(("linux_machdepioctl: invalid ioctl %08lx\n",
1111 			    com));
1112 		goto out;
1113 	}
1114 	SCARG(&bia, com) = com;
1115 	/* XXX NJWLWP */
1116 	error = sys_ioctl(curlwp, &bia, retval);
1117 out:
1118 	FILE_UNUSE(fp ,p);
1119 	return error;
1120 }
1121 
1122 /*
1123  * Set I/O permissions for a process. Just set the maximum level
1124  * right away (ignoring the argument), otherwise we would have
1125  * to rely on I/O permission maps, which are not implemented.
1126  */
1127 int
1128 linux_sys_iopl(l, v, retval)
1129 	struct lwp *l;
1130 	void *v;
1131 	register_t *retval;
1132 {
1133 #if 0
1134 	struct linux_sys_iopl_args /* {
1135 		syscallarg(int) level;
1136 	} */ *uap = v;
1137 #endif
1138 	struct proc *p = l->l_proc;
1139 	struct trapframe *fp = l->l_md.md_regs;
1140 
1141 	if (suser(p->p_ucred, &p->p_acflag) != 0)
1142 		return EPERM;
1143 	fp->tf_eflags |= PSL_IOPL;
1144 	*retval = 0;
1145 	return 0;
1146 }
1147 
1148 /*
1149  * See above. If a root process tries to set access to an I/O port,
1150  * just let it have the whole range.
1151  */
1152 int
1153 linux_sys_ioperm(l, v, retval)
1154 	struct lwp *l;
1155 	void *v;
1156 	register_t *retval;
1157 {
1158 	struct linux_sys_ioperm_args /* {
1159 		syscallarg(unsigned int) lo;
1160 		syscallarg(unsigned int) hi;
1161 		syscallarg(int) val;
1162 	} */ *uap = v;
1163 	struct proc *p = l->l_proc;
1164 	struct trapframe *fp = l->l_md.md_regs;
1165 
1166 	if (suser(p->p_ucred, &p->p_acflag) != 0)
1167 		return EPERM;
1168 	if (SCARG(uap, val))
1169 		fp->tf_eflags |= PSL_IOPL;
1170 	*retval = 0;
1171 	return 0;
1172 }
1173 
1174 int
1175 linux_usertrap(struct lwp *l, vaddr_t trapaddr, void *arg)
1176 {
1177 	return 0;
1178 }
1179