xref: /netbsd-src/sys/compat/linux/arch/i386/linux_machdep.c (revision b7ae68fde0d8ef1c03714e8bbb1ee7c6118ea93b)
1 /*	$NetBSD: linux_machdep.c,v 1.115 2006/09/13 00:49:07 christos Exp $	*/
2 
3 /*-
4  * Copyright (c) 1995, 2000 The NetBSD Foundation, Inc.
5  * All rights reserved.
6  *
7  * This code is derived from software contributed to The NetBSD Foundation
8  * by Frank van der Linden.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  * 3. All advertising materials mentioning features or use of this software
19  *    must display the following acknowledgement:
20  *	This product includes software developed by the NetBSD
21  *	Foundation, Inc. and its contributors.
22  * 4. Neither the name of The NetBSD Foundation nor the names of its
23  *    contributors may be used to endorse or promote products derived
24  *    from this software without specific prior written permission.
25  *
26  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
27  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
28  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
29  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
30  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36  * POSSIBILITY OF SUCH DAMAGE.
37  */
38 
39 #include <sys/cdefs.h>
40 __KERNEL_RCSID(0, "$NetBSD: linux_machdep.c,v 1.115 2006/09/13 00:49:07 christos Exp $");
41 
42 #if defined(_KERNEL_OPT)
43 #include "opt_vm86.h"
44 #include "opt_user_ldt.h"
45 #endif
46 
47 #include <sys/param.h>
48 #include <sys/systm.h>
49 #include <sys/signalvar.h>
50 #include <sys/kernel.h>
51 #include <sys/proc.h>
52 #include <sys/user.h>
53 #include <sys/buf.h>
54 #include <sys/reboot.h>
55 #include <sys/conf.h>
56 #include <sys/exec.h>
57 #include <sys/file.h>
58 #include <sys/callout.h>
59 #include <sys/malloc.h>
60 #include <sys/mbuf.h>
61 #include <sys/msgbuf.h>
62 #include <sys/mount.h>
63 #include <sys/vnode.h>
64 #include <sys/device.h>
65 #include <sys/sa.h>
66 #include <sys/syscallargs.h>
67 #include <sys/filedesc.h>
68 #include <sys/exec_elf.h>
69 #include <sys/disklabel.h>
70 #include <sys/ioctl.h>
71 #include <sys/wait.h>
72 #include <sys/kauth.h>
73 
74 #include <miscfs/specfs/specdev.h>
75 
76 #include <compat/linux/common/linux_types.h>
77 #include <compat/linux/common/linux_signal.h>
78 #include <compat/linux/common/linux_util.h>
79 #include <compat/linux/common/linux_ioctl.h>
80 #include <compat/linux/common/linux_hdio.h>
81 #include <compat/linux/common/linux_exec.h>
82 #include <compat/linux/common/linux_machdep.h>
83 #include <compat/linux/common/linux_errno.h>
84 
85 #include <compat/linux/linux_syscallargs.h>
86 
87 #include <machine/cpu.h>
88 #include <machine/cpufunc.h>
89 #include <machine/psl.h>
90 #include <machine/reg.h>
91 #include <machine/segments.h>
92 #include <machine/specialreg.h>
93 #include <machine/sysarch.h>
94 #include <machine/vm86.h>
95 #include <machine/vmparam.h>
96 
97 /*
98  * To see whether wscons is configured (for virtual console ioctl calls).
99  */
100 #if defined(_KERNEL_OPT)
101 #include "wsdisplay.h"
102 #endif
103 #if (NWSDISPLAY > 0)
104 #include <dev/wscons/wsconsio.h>
105 #include <dev/wscons/wsdisplay_usl_io.h>
106 #if defined(_KERNEL_OPT)
107 #include "opt_xserver.h"
108 #endif
109 #endif
110 
111 #ifdef USER_LDT
112 #include <machine/cpu.h>
113 int linux_read_ldt __P((struct lwp *, struct linux_sys_modify_ldt_args *,
114     register_t *));
115 int linux_write_ldt __P((struct lwp *, struct linux_sys_modify_ldt_args *,
116     register_t *));
117 #endif
118 
119 #ifdef DEBUG_LINUX
120 #define DPRINTF(a) uprintf a
121 #else
122 #define DPRINTF(a)
123 #endif
124 
125 static struct biosdisk_info *fd2biosinfo __P((struct proc *, struct file *));
126 extern struct disklist *x86_alldisks;
127 static void linux_save_ucontext __P((struct lwp *, struct trapframe *,
128     const sigset_t *, struct sigaltstack *, struct linux_ucontext *));
129 static void linux_save_sigcontext __P((struct lwp *, struct trapframe *,
130     const sigset_t *, struct linux_sigcontext *));
131 static int linux_restore_sigcontext __P((struct lwp *,
132     struct linux_sigcontext *, register_t *));
133 static void linux_rt_sendsig __P((const ksiginfo_t *, const sigset_t *));
134 static void linux_old_sendsig __P((const ksiginfo_t *, const sigset_t *));
135 
136 extern char linux_sigcode[], linux_rt_sigcode[];
137 /*
138  * Deal with some i386-specific things in the Linux emulation code.
139  */
140 
141 void
142 linux_setregs(l, epp, stack)
143 	struct lwp *l;
144 	struct exec_package *epp;
145 	u_long stack;
146 {
147 	struct pcb *pcb = &l->l_addr->u_pcb;
148 	struct trapframe *tf;
149 
150 #if NNPX > 0
151 	/* If we were using the FPU, forget about it. */
152 	if (npxproc == l)
153 		npxdrop();
154 #endif
155 
156 #ifdef USER_LDT
157 	pmap_ldt_cleanup(l);
158 #endif
159 
160 	l->l_md.md_flags &= ~MDL_USEDFPU;
161 
162 	if (i386_use_fxsave) {
163 		pcb->pcb_savefpu.sv_xmm.sv_env.en_cw = __Linux_NPXCW__;
164 		pcb->pcb_savefpu.sv_xmm.sv_env.en_mxcsr = __INITIAL_MXCSR__;
165 	} else
166 		pcb->pcb_savefpu.sv_87.sv_env.en_cw = __Linux_NPXCW__;
167 
168 	tf = l->l_md.md_regs;
169 	tf->tf_gs = GSEL(GUDATA_SEL, SEL_UPL);
170 	tf->tf_fs = GSEL(GUDATA_SEL, SEL_UPL);
171 	tf->tf_es = GSEL(GUDATA_SEL, SEL_UPL);
172 	tf->tf_ds = GSEL(GUDATA_SEL, SEL_UPL);
173 	tf->tf_edi = 0;
174 	tf->tf_esi = 0;
175 	tf->tf_ebp = 0;
176 	tf->tf_ebx = (int)l->l_proc->p_psstr;
177 	tf->tf_edx = 0;
178 	tf->tf_ecx = 0;
179 	tf->tf_eax = 0;
180 	tf->tf_eip = epp->ep_entry;
181 	tf->tf_cs = GSEL(GUCODEBIG_SEL, SEL_UPL);
182 	tf->tf_eflags = PSL_USERSET;
183 	tf->tf_esp = stack;
184 	tf->tf_ss = GSEL(GUDATA_SEL, SEL_UPL);
185 }
186 
187 /*
188  * Send an interrupt to process.
189  *
190  * Stack is set up to allow sigcode stored
191  * in u. to call routine, followed by kcall
192  * to sigreturn routine below.  After sigreturn
193  * resets the signal mask, the stack, and the
194  * frame pointer, it returns to the user
195  * specified pc, psl.
196  */
197 
198 void
199 linux_sendsig(const ksiginfo_t *ksi, const sigset_t *mask)
200 {
201 	if (SIGACTION(curproc, ksi->ksi_signo).sa_flags & SA_SIGINFO)
202 		linux_rt_sendsig(ksi, mask);
203 	else
204 		linux_old_sendsig(ksi, mask);
205 }
206 
207 
208 static void
209 linux_save_ucontext(l, tf, mask, sas, uc)
210 	struct lwp *l;
211 	struct trapframe *tf;
212 	const sigset_t *mask;
213 	struct sigaltstack *sas;
214 	struct linux_ucontext *uc;
215 {
216 	uc->uc_flags = 0;
217 	uc->uc_link = NULL;
218 	native_to_linux_sigaltstack(&uc->uc_stack, sas);
219 	linux_save_sigcontext(l, tf, mask, &uc->uc_mcontext);
220 	native_to_linux_sigset(&uc->uc_sigmask, mask);
221 	(void)memset(&uc->uc_fpregs_mem, 0, sizeof(uc->uc_fpregs_mem));
222 }
223 
224 static void
225 linux_save_sigcontext(l, tf, mask, sc)
226 	struct lwp *l;
227 	struct trapframe *tf;
228 	const sigset_t *mask;
229 	struct linux_sigcontext *sc;
230 {
231 	/* Save register context. */
232 #ifdef VM86
233 	if (tf->tf_eflags & PSL_VM) {
234 		sc->sc_gs = tf->tf_vm86_gs;
235 		sc->sc_fs = tf->tf_vm86_fs;
236 		sc->sc_es = tf->tf_vm86_es;
237 		sc->sc_ds = tf->tf_vm86_ds;
238 		sc->sc_eflags = get_vflags(l);
239 	} else
240 #endif
241 	{
242 		sc->sc_gs = tf->tf_gs;
243 		sc->sc_fs = tf->tf_fs;
244 		sc->sc_es = tf->tf_es;
245 		sc->sc_ds = tf->tf_ds;
246 		sc->sc_eflags = tf->tf_eflags;
247 	}
248 	sc->sc_edi = tf->tf_edi;
249 	sc->sc_esi = tf->tf_esi;
250 	sc->sc_esp = tf->tf_esp;
251 	sc->sc_ebp = tf->tf_ebp;
252 	sc->sc_ebx = tf->tf_ebx;
253 	sc->sc_edx = tf->tf_edx;
254 	sc->sc_ecx = tf->tf_ecx;
255 	sc->sc_eax = tf->tf_eax;
256 	sc->sc_eip = tf->tf_eip;
257 	sc->sc_cs = tf->tf_cs;
258 	sc->sc_esp_at_signal = tf->tf_esp;
259 	sc->sc_ss = tf->tf_ss;
260 	sc->sc_err = tf->tf_err;
261 	sc->sc_trapno = tf->tf_trapno;
262 	sc->sc_cr2 = l->l_addr->u_pcb.pcb_cr2;
263 	sc->sc_387 = NULL;
264 
265 	/* Save signal stack. */
266 	/* Linux doesn't save the onstack flag in sigframe */
267 
268 	/* Save signal mask. */
269 	native_to_linux_old_sigset(&sc->sc_mask, mask);
270 }
271 
272 static void
273 linux_rt_sendsig(const ksiginfo_t *ksi, const sigset_t *mask)
274 {
275 	struct lwp *l = curlwp;
276 	struct proc *p = l->l_proc;
277 	struct trapframe *tf;
278 	struct linux_rt_sigframe *fp, frame;
279 	int onstack;
280 	linux_siginfo_t *lsi;
281 	int sig = ksi->ksi_signo;
282 	sig_t catcher = SIGACTION(p, sig).sa_handler;
283 	struct sigaltstack *sas = &p->p_sigctx.ps_sigstk;
284 
285 	tf = l->l_md.md_regs;
286 	/* Do we need to jump onto the signal stack? */
287 	onstack = (sas->ss_flags & (SS_DISABLE | SS_ONSTACK)) == 0 &&
288 	    (SIGACTION(p, sig).sa_flags & SA_ONSTACK) != 0;
289 
290 
291 	/* Allocate space for the signal handler context. */
292 	if (onstack)
293 		fp = (struct linux_rt_sigframe *)((caddr_t)sas->ss_sp +
294 		    sas->ss_size);
295 	else
296 		fp = (struct linux_rt_sigframe *)tf->tf_esp;
297 	fp--;
298 
299 	DPRINTF(("rt: onstack = %d, fp = %p sig = %d eip = 0x%x cr2 = 0x%x\n",
300 	    onstack, fp, sig, tf->tf_eip, l->l_addr->u_pcb.pcb_cr2));
301 
302 	/* Build stack frame for signal trampoline. */
303 	frame.sf_handler = catcher;
304 	frame.sf_sig = native_to_linux_signo[sig];
305 	frame.sf_sip = &fp->sf_si;
306 	frame.sf_ucp = &fp->sf_uc;
307 
308 	/*
309 	 * XXX: the following code assumes that the constants for
310 	 * siginfo are the same between linux and NetBSD.
311 	 */
312 	(void)memset(lsi = &frame.sf_si, 0, sizeof(frame.sf_si));
313 	lsi->lsi_errno = native_to_linux_errno[ksi->ksi_errno];
314 	lsi->lsi_code = ksi->ksi_code;
315 	switch (lsi->lsi_signo = frame.sf_sig) {
316 	case LINUX_SIGILL:
317 	case LINUX_SIGFPE:
318 	case LINUX_SIGSEGV:
319 	case LINUX_SIGBUS:
320 	case LINUX_SIGTRAP:
321 		lsi->lsi_addr = ksi->ksi_addr;
322 		break;
323 	case LINUX_SIGCHLD:
324 		lsi->lsi_uid = ksi->ksi_uid;
325 		lsi->lsi_pid = ksi->ksi_pid;
326 		lsi->lsi_utime = ksi->ksi_utime;
327 		lsi->lsi_stime = ksi->ksi_stime;
328 
329 		/* We use the same codes */
330 		lsi->lsi_code = ksi->ksi_code;
331 		/* XXX is that right? */
332 		lsi->lsi_status = WEXITSTATUS(ksi->ksi_status);
333 		break;
334 	case LINUX_SIGIO:
335 		lsi->lsi_band = ksi->ksi_band;
336 		lsi->lsi_fd = ksi->ksi_fd;
337 		break;
338 	default:
339 		lsi->lsi_uid = ksi->ksi_uid;
340 		lsi->lsi_pid = ksi->ksi_pid;
341 		if (lsi->lsi_signo == LINUX_SIGALRM ||
342 		    lsi->lsi_signo >= LINUX_SIGRTMIN)
343 			lsi->lsi_value.sival_ptr = ksi->ksi_sigval.sival_ptr;
344 		break;
345 	}
346 
347 	/* Save register context. */
348 	linux_save_ucontext(l, tf, mask, sas, &frame.sf_uc);
349 
350 	if (copyout(&frame, fp, sizeof(frame)) != 0) {
351 		/*
352 		 * Process has trashed its stack; give it an illegal
353 		 * instruction to halt it in its tracks.
354 		 */
355 		sigexit(l, SIGILL);
356 		/* NOTREACHED */
357 	}
358 
359 	/*
360 	 * Build context to run handler in.
361 	 */
362 	tf->tf_gs = GSEL(GUDATA_SEL, SEL_UPL);
363 	tf->tf_fs = GSEL(GUDATA_SEL, SEL_UPL);
364 	tf->tf_es = GSEL(GUDATA_SEL, SEL_UPL);
365 	tf->tf_ds = GSEL(GUDATA_SEL, SEL_UPL);
366 	tf->tf_eip = ((int)p->p_sigctx.ps_sigcode) +
367 	    (linux_rt_sigcode - linux_sigcode);
368 	tf->tf_cs = GSEL(GUCODE_SEL, SEL_UPL);
369 	tf->tf_eflags &= ~(PSL_T|PSL_VM|PSL_AC);
370 	tf->tf_esp = (int)fp;
371 	tf->tf_ss = GSEL(GUDATA_SEL, SEL_UPL);
372 
373 	/* Remember that we're now on the signal stack. */
374 	if (onstack)
375 		sas->ss_flags |= SS_ONSTACK;
376 }
377 
378 static void
379 linux_old_sendsig(const ksiginfo_t *ksi, const sigset_t *mask)
380 {
381 	struct lwp *l = curlwp;
382 	struct proc *p = l->l_proc;
383 	struct trapframe *tf;
384 	struct linux_sigframe *fp, frame;
385 	int onstack;
386 	int sig = ksi->ksi_signo;
387 	sig_t catcher = SIGACTION(p, sig).sa_handler;
388 	struct sigaltstack *sas = &p->p_sigctx.ps_sigstk;
389 
390 	tf = l->l_md.md_regs;
391 
392 	/* Do we need to jump onto the signal stack? */
393 	onstack = (sas->ss_flags & (SS_DISABLE | SS_ONSTACK)) == 0 &&
394 	    (SIGACTION(p, sig).sa_flags & SA_ONSTACK) != 0;
395 
396 	/* Allocate space for the signal handler context. */
397 	if (onstack)
398 		fp = (struct linux_sigframe *) ((caddr_t)sas->ss_sp +
399 		    sas->ss_size);
400 	else
401 		fp = (struct linux_sigframe *)tf->tf_esp;
402 	fp--;
403 
404 	DPRINTF(("old: onstack = %d, fp = %p sig = %d eip = 0x%x cr2 = 0x%x\n",
405 	    onstack, fp, sig, tf->tf_eip, l->l_addr->u_pcb.pcb_cr2));
406 
407 	/* Build stack frame for signal trampoline. */
408 	frame.sf_handler = catcher;
409 	frame.sf_sig = native_to_linux_signo[sig];
410 
411 	linux_save_sigcontext(l, tf, mask, &frame.sf_sc);
412 
413 	if (copyout(&frame, fp, sizeof(frame)) != 0) {
414 		/*
415 		 * Process has trashed its stack; give it an illegal
416 		 * instruction to halt it in its tracks.
417 		 */
418 		sigexit(l, SIGILL);
419 		/* NOTREACHED */
420 	}
421 
422 	/*
423 	 * Build context to run handler in.
424 	 */
425 	tf->tf_gs = GSEL(GUDATA_SEL, SEL_UPL);
426 	tf->tf_fs = GSEL(GUDATA_SEL, SEL_UPL);
427 	tf->tf_es = GSEL(GUDATA_SEL, SEL_UPL);
428 	tf->tf_ds = GSEL(GUDATA_SEL, SEL_UPL);
429 	tf->tf_eip = (int)p->p_sigctx.ps_sigcode;
430 	tf->tf_cs = GSEL(GUCODEBIG_SEL, SEL_UPL);
431 	tf->tf_eflags &= ~(PSL_T|PSL_VM|PSL_AC);
432 	tf->tf_esp = (int)fp;
433 	tf->tf_ss = GSEL(GUDATA_SEL, SEL_UPL);
434 
435 	/* Remember that we're now on the signal stack. */
436 	if (onstack)
437 		sas->ss_flags |= SS_ONSTACK;
438 }
439 
440 /*
441  * System call to cleanup state after a signal
442  * has been taken.  Reset signal mask and
443  * stack state from context left by sendsig (above).
444  * Return to previous pc and psl as specified by
445  * context left by sendsig. Check carefully to
446  * make sure that the user has not modified the
447  * psl to gain improper privileges or to cause
448  * a machine fault.
449  */
450 int
451 linux_sys_rt_sigreturn(l, v, retval)
452 	struct lwp *l;
453 	void *v;
454 	register_t *retval;
455 {
456 	struct linux_sys_rt_sigreturn_args /* {
457 		syscallarg(struct linux_ucontext *) ucp;
458 	} */ *uap = v;
459 	struct linux_ucontext context, *ucp = SCARG(uap, ucp);
460 	int error;
461 
462 	/*
463 	 * The trampoline code hands us the context.
464 	 * It is unsafe to keep track of it ourselves, in the event that a
465 	 * program jumps out of a signal handler.
466 	 */
467 	if ((error = copyin(ucp, &context, sizeof(*ucp))) != 0)
468 		return error;
469 
470 	/* XXX XAX we can do better here by using more of the ucontext */
471 	return linux_restore_sigcontext(l, &context.uc_mcontext, retval);
472 }
473 
474 int
475 linux_sys_sigreturn(l, v, retval)
476 	struct lwp *l;
477 	void *v;
478 	register_t *retval;
479 {
480 	struct linux_sys_sigreturn_args /* {
481 		syscallarg(struct linux_sigcontext *) scp;
482 	} */ *uap = v;
483 	struct linux_sigcontext context, *scp = SCARG(uap, scp);
484 	int error;
485 
486 	/*
487 	 * The trampoline code hands us the context.
488 	 * It is unsafe to keep track of it ourselves, in the event that a
489 	 * program jumps out of a signal handler.
490 	 */
491 	if ((error = copyin((caddr_t)scp, &context, sizeof(*scp))) != 0)
492 		return error;
493 	return linux_restore_sigcontext(l, &context, retval);
494 }
495 
496 static int
497 linux_restore_sigcontext(l, scp, retval)
498 	struct lwp *l;
499 	struct linux_sigcontext *scp;
500 	register_t *retval;
501 {
502 	struct proc *p = l->l_proc;
503 	struct sigaltstack *sas = &p->p_sigctx.ps_sigstk;
504 	struct trapframe *tf;
505 	sigset_t mask;
506 	ssize_t ss_gap;
507 	/* Restore register context. */
508 	tf = l->l_md.md_regs;
509 
510 	DPRINTF(("sigreturn enter esp=%x eip=%x\n", tf->tf_esp, tf->tf_eip));
511 #ifdef VM86
512 	if (scp->sc_eflags & PSL_VM) {
513 		void syscall_vm86 __P((struct trapframe *));
514 
515 		tf->tf_vm86_gs = scp->sc_gs;
516 		tf->tf_vm86_fs = scp->sc_fs;
517 		tf->tf_vm86_es = scp->sc_es;
518 		tf->tf_vm86_ds = scp->sc_ds;
519 		set_vflags(l, scp->sc_eflags);
520 		p->p_md.md_syscall = syscall_vm86;
521 	} else
522 #endif
523 	{
524 		/*
525 		 * Check for security violations.  If we're returning to
526 		 * protected mode, the CPU will validate the segment registers
527 		 * automatically and generate a trap on violations.  We handle
528 		 * the trap, rather than doing all of the checking here.
529 		 */
530 		if (((scp->sc_eflags ^ tf->tf_eflags) & PSL_USERSTATIC) != 0 ||
531 		    !USERMODE(scp->sc_cs, scp->sc_eflags))
532 			return EINVAL;
533 
534 		tf->tf_gs = scp->sc_gs;
535 		tf->tf_fs = scp->sc_fs;
536 		tf->tf_es = scp->sc_es;
537 		tf->tf_ds = scp->sc_ds;
538 #ifdef VM86
539 		if (tf->tf_eflags & PSL_VM)
540 			(*p->p_emul->e_syscall_intern)(p);
541 #endif
542 		tf->tf_eflags = scp->sc_eflags;
543 	}
544 	tf->tf_edi = scp->sc_edi;
545 	tf->tf_esi = scp->sc_esi;
546 	tf->tf_ebp = scp->sc_ebp;
547 	tf->tf_ebx = scp->sc_ebx;
548 	tf->tf_edx = scp->sc_edx;
549 	tf->tf_ecx = scp->sc_ecx;
550 	tf->tf_eax = scp->sc_eax;
551 	tf->tf_eip = scp->sc_eip;
552 	tf->tf_cs = scp->sc_cs;
553 	tf->tf_esp = scp->sc_esp_at_signal;
554 	tf->tf_ss = scp->sc_ss;
555 
556 	/* Restore signal stack. */
557 	/*
558 	 * Linux really does it this way; it doesn't have space in sigframe
559 	 * to save the onstack flag.
560 	 */
561 	ss_gap = (ssize_t)
562 	    ((caddr_t) scp->sc_esp_at_signal - (caddr_t) sas->ss_sp);
563 	if (ss_gap >= 0 && ss_gap < sas->ss_size)
564 		sas->ss_flags |= SS_ONSTACK;
565 	else
566 		sas->ss_flags &= ~SS_ONSTACK;
567 
568 	/* Restore signal mask. */
569 	linux_old_to_native_sigset(&mask, &scp->sc_mask);
570 	(void) sigprocmask1(p, SIG_SETMASK, &mask, 0);
571 	DPRINTF(("sigreturn exit esp=%x eip=%x\n", tf->tf_esp, tf->tf_eip));
572 	return EJUSTRETURN;
573 }
574 
575 #ifdef USER_LDT
576 
577 int
578 linux_read_ldt(l, uap, retval)
579 	struct lwp *l;
580 	struct linux_sys_modify_ldt_args /* {
581 		syscallarg(int) func;
582 		syscallarg(void *) ptr;
583 		syscallarg(size_t) bytecount;
584 	} */ *uap;
585 	register_t *retval;
586 {
587 	struct proc *p = l->l_proc;
588 	struct i386_get_ldt_args gl;
589 	int error;
590 	caddr_t sg;
591 	char *parms;
592 
593 	DPRINTF(("linux_read_ldt!"));
594 	sg = stackgap_init(p, 0);
595 
596 	gl.start = 0;
597 	gl.desc = SCARG(uap, ptr);
598 	gl.num = SCARG(uap, bytecount) / sizeof(union descriptor);
599 
600 	parms = stackgap_alloc(p, &sg, sizeof(gl));
601 
602 	if ((error = copyout(&gl, parms, sizeof(gl))) != 0)
603 		return (error);
604 
605 	if ((error = i386_get_ldt(l, parms, retval)) != 0)
606 		return (error);
607 
608 	*retval *= sizeof(union descriptor);
609 	return (0);
610 }
611 
612 struct linux_ldt_info {
613 	u_int entry_number;
614 	u_long base_addr;
615 	u_int limit;
616 	u_int seg_32bit:1;
617 	u_int contents:2;
618 	u_int read_exec_only:1;
619 	u_int limit_in_pages:1;
620 	u_int seg_not_present:1;
621 	u_int useable:1;
622 };
623 
624 int
625 linux_write_ldt(l, uap, retval)
626 	struct lwp *l;
627 	struct linux_sys_modify_ldt_args /* {
628 		syscallarg(int) func;
629 		syscallarg(void *) ptr;
630 		syscallarg(size_t) bytecount;
631 	} */ *uap;
632 	register_t *retval;
633 {
634 	struct proc *p = l->l_proc;
635 	struct linux_ldt_info ldt_info;
636 	struct segment_descriptor sd;
637 	struct i386_set_ldt_args sl;
638 	int error;
639 	caddr_t sg;
640 	char *parms;
641 	int oldmode = (int)retval[0];
642 
643 	DPRINTF(("linux_write_ldt %d\n", oldmode));
644 	if (SCARG(uap, bytecount) != sizeof(ldt_info))
645 		return (EINVAL);
646 	if ((error = copyin(SCARG(uap, ptr), &ldt_info, sizeof(ldt_info))) != 0)
647 		return error;
648 	if (ldt_info.entry_number >= 8192)
649 		return (EINVAL);
650 	if (ldt_info.contents == 3) {
651 		if (oldmode)
652 			return (EINVAL);
653 		if (ldt_info.seg_not_present)
654 			return (EINVAL);
655 	}
656 
657 	if (ldt_info.base_addr == 0 && ldt_info.limit == 0 &&
658 	    (oldmode || (ldt_info.contents == 0 &&
659 	    ldt_info.read_exec_only == 1 && ldt_info.seg_32bit == 0 &&
660 	    ldt_info.limit_in_pages == 0 && ldt_info.seg_not_present == 1 &&
661 	    ldt_info.useable == 0))) {
662 		/* this means you should zero the ldt */
663 		(void)memset(&sd, 0, sizeof(sd));
664 	} else {
665 		sd.sd_lobase = ldt_info.base_addr & 0xffffff;
666 		sd.sd_hibase = (ldt_info.base_addr >> 24) & 0xff;
667 		sd.sd_lolimit = ldt_info.limit & 0xffff;
668 		sd.sd_hilimit = (ldt_info.limit >> 16) & 0xf;
669 		sd.sd_type = 16 | (ldt_info.contents << 2) |
670 		    (!ldt_info.read_exec_only << 1);
671 		sd.sd_dpl = SEL_UPL;
672 		sd.sd_p = !ldt_info.seg_not_present;
673 		sd.sd_def32 = ldt_info.seg_32bit;
674 		sd.sd_gran = ldt_info.limit_in_pages;
675 		if (!oldmode)
676 			sd.sd_xx = ldt_info.useable;
677 		else
678 			sd.sd_xx = 0;
679 	}
680 	sg = stackgap_init(p, 0);
681 	sl.start = ldt_info.entry_number;
682 	sl.desc = stackgap_alloc(p, &sg, sizeof(sd));
683 	sl.num = 1;
684 
685 	DPRINTF(("linux_write_ldt: idx=%d, base=0x%lx, limit=0x%x\n",
686 	    ldt_info.entry_number, ldt_info.base_addr, ldt_info.limit));
687 
688 	parms = stackgap_alloc(p, &sg, sizeof(sl));
689 
690 	if ((error = copyout(&sd, sl.desc, sizeof(sd))) != 0)
691 		return (error);
692 	if ((error = copyout(&sl, parms, sizeof(sl))) != 0)
693 		return (error);
694 
695 	if ((error = i386_set_ldt(l, parms, retval)) != 0)
696 		return (error);
697 
698 	*retval = 0;
699 	return (0);
700 }
701 
702 #endif /* USER_LDT */
703 
704 int
705 linux_sys_modify_ldt(l, v, retval)
706 	struct lwp *l;
707 	void *v;
708 	register_t *retval;
709 {
710 	struct linux_sys_modify_ldt_args /* {
711 		syscallarg(int) func;
712 		syscallarg(void *) ptr;
713 		syscallarg(size_t) bytecount;
714 	} */ *uap = v;
715 
716 	switch (SCARG(uap, func)) {
717 #ifdef USER_LDT
718 	case 0:
719 		return linux_read_ldt(l, uap, retval);
720 	case 1:
721 		retval[0] = 1;
722 		return linux_write_ldt(l, uap, retval);
723 	case 2:
724 #ifdef notyet
725 		return (linux_read_default_ldt(l, uap, retval);
726 #else
727 		return (ENOSYS);
728 #endif
729 	case 0x11:
730 		retval[0] = 0;
731 		return linux_write_ldt(l, uap, retval);
732 #endif /* USER_LDT */
733 
734 	default:
735 		return (ENOSYS);
736 	}
737 }
738 
739 /*
740  * XXX Pathetic hack to make svgalib work. This will fake the major
741  * device number of an opened VT so that svgalib likes it. grmbl.
742  * Should probably do it 'wrong the right way' and use a mapping
743  * array for all major device numbers, and map linux_mknod too.
744  */
745 dev_t
746 linux_fakedev(dev, raw)
747 	dev_t dev;
748 	int raw;
749 {
750 	extern const struct cdevsw ptc_cdevsw, pts_cdevsw;
751 	const struct cdevsw *cd = cdevsw_lookup(dev);
752 
753 	if (raw) {
754 #if (NWSDISPLAY > 0)
755 		extern const struct cdevsw wsdisplay_cdevsw;
756 		if (cd == &wsdisplay_cdevsw)
757 			return makedev(LINUX_CONS_MAJOR, (minor(dev) + 1));
758 #endif
759 	}
760 
761 	if (cd == &ptc_cdevsw)
762 		return makedev(LINUX_PTC_MAJOR, minor(dev));
763 	if (cd == &pts_cdevsw)
764 		return makedev(LINUX_PTS_MAJOR, minor(dev));
765 
766 	return dev;
767 }
768 
769 #if (NWSDISPLAY > 0)
770 /*
771  * That's not complete, but enough to get an X server running.
772  */
773 #define NR_KEYS 128
774 static const u_short plain_map[NR_KEYS] = {
775 	0x0200,	0x001b,	0x0031,	0x0032,	0x0033,	0x0034,	0x0035,	0x0036,
776 	0x0037,	0x0038,	0x0039,	0x0030,	0x002d,	0x003d,	0x007f,	0x0009,
777 	0x0b71,	0x0b77,	0x0b65,	0x0b72,	0x0b74,	0x0b79,	0x0b75,	0x0b69,
778 	0x0b6f,	0x0b70,	0x005b,	0x005d,	0x0201,	0x0702,	0x0b61,	0x0b73,
779 	0x0b64,	0x0b66,	0x0b67,	0x0b68,	0x0b6a,	0x0b6b,	0x0b6c,	0x003b,
780 	0x0027,	0x0060,	0x0700,	0x005c,	0x0b7a,	0x0b78,	0x0b63,	0x0b76,
781 	0x0b62,	0x0b6e,	0x0b6d,	0x002c,	0x002e,	0x002f,	0x0700,	0x030c,
782 	0x0703,	0x0020,	0x0207,	0x0100,	0x0101,	0x0102,	0x0103,	0x0104,
783 	0x0105,	0x0106,	0x0107,	0x0108,	0x0109,	0x0208,	0x0209,	0x0307,
784 	0x0308,	0x0309,	0x030b,	0x0304,	0x0305,	0x0306,	0x030a,	0x0301,
785 	0x0302,	0x0303,	0x0300,	0x0310,	0x0206,	0x0200,	0x003c,	0x010a,
786 	0x010b,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
787 	0x030e,	0x0702,	0x030d,	0x001c,	0x0701,	0x0205,	0x0114,	0x0603,
788 	0x0118,	0x0601,	0x0602,	0x0117,	0x0600,	0x0119,	0x0115,	0x0116,
789 	0x011a,	0x010c,	0x010d,	0x011b,	0x011c,	0x0110,	0x0311,	0x011d,
790 	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
791 }, shift_map[NR_KEYS] = {
792 	0x0200,	0x001b,	0x0021,	0x0040,	0x0023,	0x0024,	0x0025,	0x005e,
793 	0x0026,	0x002a,	0x0028,	0x0029,	0x005f,	0x002b,	0x007f,	0x0009,
794 	0x0b51,	0x0b57,	0x0b45,	0x0b52,	0x0b54,	0x0b59,	0x0b55,	0x0b49,
795 	0x0b4f,	0x0b50,	0x007b,	0x007d,	0x0201,	0x0702,	0x0b41,	0x0b53,
796 	0x0b44,	0x0b46,	0x0b47,	0x0b48,	0x0b4a,	0x0b4b,	0x0b4c,	0x003a,
797 	0x0022,	0x007e,	0x0700,	0x007c,	0x0b5a,	0x0b58,	0x0b43,	0x0b56,
798 	0x0b42,	0x0b4e,	0x0b4d,	0x003c,	0x003e,	0x003f,	0x0700,	0x030c,
799 	0x0703,	0x0020,	0x0207,	0x010a,	0x010b,	0x010c,	0x010d,	0x010e,
800 	0x010f,	0x0110,	0x0111,	0x0112,	0x0113,	0x0213,	0x0203,	0x0307,
801 	0x0308,	0x0309,	0x030b,	0x0304,	0x0305,	0x0306,	0x030a,	0x0301,
802 	0x0302,	0x0303,	0x0300,	0x0310,	0x0206,	0x0200,	0x003e,	0x010a,
803 	0x010b,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
804 	0x030e,	0x0702,	0x030d,	0x0200,	0x0701,	0x0205,	0x0114,	0x0603,
805 	0x020b,	0x0601,	0x0602,	0x0117,	0x0600,	0x020a,	0x0115,	0x0116,
806 	0x011a,	0x010c,	0x010d,	0x011b,	0x011c,	0x0110,	0x0311,	0x011d,
807 	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
808 }, altgr_map[NR_KEYS] = {
809 	0x0200,	0x0200,	0x0200,	0x0040,	0x0200,	0x0024,	0x0200,	0x0200,
810 	0x007b,	0x005b,	0x005d,	0x007d,	0x005c,	0x0200,	0x0200,	0x0200,
811 	0x0b71,	0x0b77,	0x0918,	0x0b72,	0x0b74,	0x0b79,	0x0b75,	0x0b69,
812 	0x0b6f,	0x0b70,	0x0200,	0x007e,	0x0201,	0x0702,	0x0914,	0x0b73,
813 	0x0917,	0x0919,	0x0b67,	0x0b68,	0x0b6a,	0x0b6b,	0x0b6c,	0x0200,
814 	0x0200,	0x0200,	0x0700,	0x0200,	0x0b7a,	0x0b78,	0x0916,	0x0b76,
815 	0x0915,	0x0b6e,	0x0b6d,	0x0200,	0x0200,	0x0200,	0x0700,	0x030c,
816 	0x0703,	0x0200,	0x0207,	0x050c,	0x050d,	0x050e,	0x050f,	0x0510,
817 	0x0511,	0x0512,	0x0513,	0x0514,	0x0515,	0x0208,	0x0202,	0x0911,
818 	0x0912,	0x0913,	0x030b,	0x090e,	0x090f,	0x0910,	0x030a,	0x090b,
819 	0x090c,	0x090d,	0x090a,	0x0310,	0x0206,	0x0200,	0x007c,	0x0516,
820 	0x0517,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
821 	0x030e,	0x0702,	0x030d,	0x0200,	0x0701,	0x0205,	0x0114,	0x0603,
822 	0x0118,	0x0601,	0x0602,	0x0117,	0x0600,	0x0119,	0x0115,	0x0116,
823 	0x011a,	0x010c,	0x010d,	0x011b,	0x011c,	0x0110,	0x0311,	0x011d,
824 	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
825 }, ctrl_map[NR_KEYS] = {
826 	0x0200,	0x0200,	0x0200,	0x0000,	0x001b,	0x001c,	0x001d,	0x001e,
827 	0x001f,	0x007f,	0x0200,	0x0200,	0x001f,	0x0200,	0x0008,	0x0200,
828 	0x0011,	0x0017,	0x0005,	0x0012,	0x0014,	0x0019,	0x0015,	0x0009,
829 	0x000f,	0x0010,	0x001b,	0x001d,	0x0201,	0x0702,	0x0001,	0x0013,
830 	0x0004,	0x0006,	0x0007,	0x0008,	0x000a,	0x000b,	0x000c,	0x0200,
831 	0x0007,	0x0000,	0x0700,	0x001c,	0x001a,	0x0018,	0x0003,	0x0016,
832 	0x0002,	0x000e,	0x000d,	0x0200,	0x020e,	0x007f,	0x0700,	0x030c,
833 	0x0703,	0x0000,	0x0207,	0x0100,	0x0101,	0x0102,	0x0103,	0x0104,
834 	0x0105,	0x0106,	0x0107,	0x0108,	0x0109,	0x0208,	0x0204,	0x0307,
835 	0x0308,	0x0309,	0x030b,	0x0304,	0x0305,	0x0306,	0x030a,	0x0301,
836 	0x0302,	0x0303,	0x0300,	0x0310,	0x0206,	0x0200,	0x0200,	0x010a,
837 	0x010b,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
838 	0x030e,	0x0702,	0x030d,	0x001c,	0x0701,	0x0205,	0x0114,	0x0603,
839 	0x0118,	0x0601,	0x0602,	0x0117,	0x0600,	0x0119,	0x0115,	0x0116,
840 	0x011a,	0x010c,	0x010d,	0x011b,	0x011c,	0x0110,	0x0311,	0x011d,
841 	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
842 };
843 
844 const u_short * const linux_keytabs[] = {
845 	plain_map, shift_map, altgr_map, altgr_map, ctrl_map
846 };
847 #endif
848 
849 static struct biosdisk_info *
850 fd2biosinfo(p, fp)
851 	struct proc *p;
852 	struct file *fp;
853 {
854 	struct vnode *vp;
855 	const char *blkname;
856 	char diskname[16];
857 	int i;
858 	struct nativedisk_info *nip;
859 	struct disklist *dl = x86_alldisks;
860 
861 	if (fp->f_type != DTYPE_VNODE)
862 		return NULL;
863 	vp = (struct vnode *)fp->f_data;
864 
865 	if (vp->v_type != VBLK)
866 		return NULL;
867 
868 	blkname = devsw_blk2name(major(vp->v_rdev));
869 	snprintf(diskname, sizeof diskname, "%s%u", blkname,
870 	    DISKUNIT(vp->v_rdev));
871 
872 	for (i = 0; i < dl->dl_nnativedisks; i++) {
873 		nip = &dl->dl_nativedisks[i];
874 		if (strcmp(diskname, nip->ni_devname))
875 			continue;
876 		if (nip->ni_nmatches != 0)
877 			return &dl->dl_biosdisks[nip->ni_biosmatches[0]];
878 	}
879 
880 	return NULL;
881 }
882 
883 
884 /*
885  * We come here in a last attempt to satisfy a Linux ioctl() call
886  */
887 int
888 linux_machdepioctl(l, v, retval)
889 	struct lwp *l;
890 	void *v;
891 	register_t *retval;
892 {
893 	struct linux_sys_ioctl_args /* {
894 		syscallarg(int) fd;
895 		syscallarg(u_long) com;
896 		syscallarg(caddr_t) data;
897 	} */ *uap = v;
898 	struct sys_ioctl_args bia;
899 	u_long com;
900 	int error, error1;
901 #if (NWSDISPLAY > 0)
902 	struct vt_mode lvt;
903 	caddr_t bvtp, sg;
904 	struct kbentry kbe;
905 #endif
906 	struct linux_hd_geometry hdg;
907 	struct linux_hd_big_geometry hdg_big;
908 	struct biosdisk_info *bip;
909 	struct filedesc *fdp;
910 	struct file *fp;
911 	int fd;
912 	struct disklabel label, *labp;
913 	struct partinfo partp;
914 	int (*ioctlf)(struct file *, u_long, void *, struct lwp *);
915 	u_long start, biostotal, realtotal;
916 	u_char heads, sectors;
917 	u_int cylinders;
918 	struct ioctl_pt pt;
919 	struct proc *p = l->l_proc;
920 
921 	fd = SCARG(uap, fd);
922 	SCARG(&bia, fd) = fd;
923 	SCARG(&bia, data) = SCARG(uap, data);
924 	com = SCARG(uap, com);
925 
926 	fdp = p->p_fd;
927 
928 	if ((fp = fd_getfile(fdp, fd)) == NULL)
929 		return (EBADF);
930 
931 	FILE_USE(fp);
932 
933 	switch (com) {
934 #if (NWSDISPLAY > 0)
935 	case LINUX_KDGKBMODE:
936 		com = KDGKBMODE;
937 		break;
938 	case LINUX_KDSKBMODE:
939 		com = KDSKBMODE;
940 		if ((unsigned)SCARG(uap, data) == LINUX_K_MEDIUMRAW)
941 			SCARG(&bia, data) = (caddr_t)K_RAW;
942 		break;
943 	case LINUX_KIOCSOUND:
944 		SCARG(&bia, data) =
945 		    (caddr_t)(((unsigned long)SCARG(&bia, data)) & 0xffff);
946 		/* fall through */
947 	case LINUX_KDMKTONE:
948 		com = KDMKTONE;
949 		break;
950 	case LINUX_KDSETMODE:
951 		com = KDSETMODE;
952 		break;
953 	case LINUX_KDGETMODE:
954 		/* KD_* values are equal to the wscons numbers */
955 		com = WSDISPLAYIO_GMODE;
956 		break;
957 	case LINUX_KDENABIO:
958 		com = KDENABIO;
959 		break;
960 	case LINUX_KDDISABIO:
961 		com = KDDISABIO;
962 		break;
963 	case LINUX_KDGETLED:
964 		com = KDGETLED;
965 		break;
966 	case LINUX_KDSETLED:
967 		com = KDSETLED;
968 		break;
969 	case LINUX_VT_OPENQRY:
970 		com = VT_OPENQRY;
971 		break;
972 	case LINUX_VT_GETMODE:
973 		SCARG(&bia, com) = VT_GETMODE;
974 		/* XXX NJWLWP */
975 		if ((error = sys_ioctl(curlwp, &bia, retval)))
976 			goto out;
977 		if ((error = copyin(SCARG(uap, data), (caddr_t)&lvt,
978 		    sizeof (struct vt_mode))))
979 			goto out;
980 		lvt.relsig = native_to_linux_signo[lvt.relsig];
981 		lvt.acqsig = native_to_linux_signo[lvt.acqsig];
982 		lvt.frsig = native_to_linux_signo[lvt.frsig];
983 		error = copyout((caddr_t)&lvt, SCARG(uap, data),
984 		    sizeof (struct vt_mode));
985 		goto out;
986 	case LINUX_VT_SETMODE:
987 		com = VT_SETMODE;
988 		if ((error = copyin(SCARG(uap, data), (caddr_t)&lvt,
989 		    sizeof (struct vt_mode))))
990 			goto out;
991 		lvt.relsig = linux_to_native_signo[lvt.relsig];
992 		lvt.acqsig = linux_to_native_signo[lvt.acqsig];
993 		lvt.frsig = linux_to_native_signo[lvt.frsig];
994 		sg = stackgap_init(p, 0);
995 		bvtp = stackgap_alloc(p, &sg, sizeof (struct vt_mode));
996 		if ((error = copyout(&lvt, bvtp, sizeof (struct vt_mode))))
997 			goto out;
998 		SCARG(&bia, data) = bvtp;
999 		break;
1000 	case LINUX_VT_DISALLOCATE:
1001 		/* XXX should use WSDISPLAYIO_DELSCREEN */
1002 		error = 0;
1003 		goto out;
1004 	case LINUX_VT_RELDISP:
1005 		com = VT_RELDISP;
1006 		break;
1007 	case LINUX_VT_ACTIVATE:
1008 		com = VT_ACTIVATE;
1009 		break;
1010 	case LINUX_VT_WAITACTIVE:
1011 		com = VT_WAITACTIVE;
1012 		break;
1013 	case LINUX_VT_GETSTATE:
1014 		com = VT_GETSTATE;
1015 		break;
1016 	case LINUX_KDGKBTYPE:
1017 	    {
1018 		static const u_int8_t kb101 = KB_101;
1019 
1020 		/* This is what Linux does. */
1021 		error = copyout(&kb101, SCARG(uap, data), 1);
1022 		goto out;
1023 	    }
1024 	case LINUX_KDGKBENT:
1025 		/*
1026 		 * The Linux KDGKBENT ioctl is different from the
1027 		 * SYSV original. So we handle it in machdep code.
1028 		 * XXX We should use keyboard mapping information
1029 		 * from wsdisplay, but this would be expensive.
1030 		 */
1031 		if ((error = copyin(SCARG(uap, data), &kbe,
1032 				    sizeof(struct kbentry))))
1033 			goto out;
1034 		if (kbe.kb_table >= sizeof(linux_keytabs) / sizeof(u_short *)
1035 		    || kbe.kb_index >= NR_KEYS) {
1036 			error = EINVAL;
1037 			goto out;
1038 		}
1039 		kbe.kb_value = linux_keytabs[kbe.kb_table][kbe.kb_index];
1040 		error = copyout(&kbe, SCARG(uap, data),
1041 				sizeof(struct kbentry));
1042 		goto out;
1043 #endif
1044 	case LINUX_HDIO_GETGEO:
1045 	case LINUX_HDIO_GETGEO_BIG:
1046 		/*
1047 		 * Try to mimic Linux behaviour: return the BIOS geometry
1048 		 * if possible (extending its # of cylinders if it's beyond
1049 		 * the 1023 limit), fall back to the MI geometry (i.e.
1050 		 * the real geometry) if not found, by returning an
1051 		 * error. See common/linux_hdio.c
1052 		 */
1053 		bip = fd2biosinfo(p, fp);
1054 		ioctlf = fp->f_ops->fo_ioctl;
1055 		error = ioctlf(fp, DIOCGDEFLABEL, (caddr_t)&label, l);
1056 		error1 = ioctlf(fp, DIOCGPART, (caddr_t)&partp, l);
1057 		if (error != 0 && error1 != 0) {
1058 			error = error1;
1059 			goto out;
1060 		}
1061 		labp = error != 0 ? &label : partp.disklab;
1062 		start = error1 != 0 ? partp.part->p_offset : 0;
1063 		if (bip != NULL && bip->bi_head != 0 && bip->bi_sec != 0
1064 		    && bip->bi_cyl != 0) {
1065 			heads = bip->bi_head;
1066 			sectors = bip->bi_sec;
1067 			cylinders = bip->bi_cyl;
1068 			biostotal = heads * sectors * cylinders;
1069 			realtotal = labp->d_ntracks * labp->d_nsectors *
1070 			    labp->d_ncylinders;
1071 			if (realtotal > biostotal)
1072 				cylinders = realtotal / (heads * sectors);
1073 		} else {
1074 			heads = labp->d_ntracks;
1075 			cylinders = labp->d_ncylinders;
1076 			sectors = labp->d_nsectors;
1077 		}
1078 		if (com == LINUX_HDIO_GETGEO) {
1079 			hdg.start = start;
1080 			hdg.heads = heads;
1081 			hdg.cylinders = cylinders;
1082 			hdg.sectors = sectors;
1083 			error = copyout(&hdg, SCARG(uap, data), sizeof hdg);
1084 			goto out;
1085 		} else {
1086 			hdg_big.start = start;
1087 			hdg_big.heads = heads;
1088 			hdg_big.cylinders = cylinders;
1089 			hdg_big.sectors = sectors;
1090 			error = copyout(&hdg_big, SCARG(uap, data),
1091 			    sizeof hdg_big);
1092 			goto out;
1093 		}
1094 
1095 	default:
1096 		/*
1097 		 * Unknown to us. If it's on a device, just pass it through
1098 		 * using PTIOCLINUX, the device itself might be able to
1099 		 * make some sense of it.
1100 		 * XXX hack: if the function returns EJUSTRETURN,
1101 		 * it has stuffed a sysctl return value in pt.data.
1102 		 */
1103 		ioctlf = fp->f_ops->fo_ioctl;
1104 		pt.com = SCARG(uap, com);
1105 		pt.data = SCARG(uap, data);
1106 		error = ioctlf(fp, PTIOCLINUX, (caddr_t)&pt, l);
1107 		if (error == EJUSTRETURN) {
1108 			retval[0] = (register_t)pt.data;
1109 			error = 0;
1110 		}
1111 
1112 		if (error == ENOTTY) {
1113 			DPRINTF(("linux_machdepioctl: invalid ioctl %08lx\n",
1114 			    com));
1115 		}
1116 		goto out;
1117 	}
1118 	SCARG(&bia, com) = com;
1119 	/* XXX NJWLWP */
1120 	error = sys_ioctl(curlwp, &bia, retval);
1121 out:
1122 	FILE_UNUSE(fp ,l);
1123 	return error;
1124 }
1125 
1126 /*
1127  * Set I/O permissions for a process. Just set the maximum level
1128  * right away (ignoring the argument), otherwise we would have
1129  * to rely on I/O permission maps, which are not implemented.
1130  */
1131 int
1132 linux_sys_iopl(l, v, retval)
1133 	struct lwp *l;
1134 	void *v;
1135 	register_t *retval;
1136 {
1137 #if 0
1138 	struct linux_sys_iopl_args /* {
1139 		syscallarg(int) level;
1140 	} */ *uap = v;
1141 #endif
1142 	struct trapframe *fp = l->l_md.md_regs;
1143 
1144 	if (kauth_authorize_generic(l->l_cred, KAUTH_GENERIC_ISSUSER,
1145 	    &l->l_acflag) != 0)
1146 		return EPERM;
1147 	fp->tf_eflags |= PSL_IOPL;
1148 	*retval = 0;
1149 	return 0;
1150 }
1151 
1152 /*
1153  * See above. If a root process tries to set access to an I/O port,
1154  * just let it have the whole range.
1155  */
1156 int
1157 linux_sys_ioperm(l, v, retval)
1158 	struct lwp *l;
1159 	void *v;
1160 	register_t *retval;
1161 {
1162 	struct linux_sys_ioperm_args /* {
1163 		syscallarg(unsigned int) lo;
1164 		syscallarg(unsigned int) hi;
1165 		syscallarg(int) val;
1166 	} */ *uap = v;
1167 	struct trapframe *fp = l->l_md.md_regs;
1168 
1169 	if (kauth_authorize_generic(l->l_cred, KAUTH_GENERIC_ISSUSER,
1170 	    &l->l_acflag) != 0)
1171 		return EPERM;
1172 	if (SCARG(uap, val))
1173 		fp->tf_eflags |= PSL_IOPL;
1174 	*retval = 0;
1175 	return 0;
1176 }
1177 
1178 int
1179 linux_usertrap(struct lwp *l, vaddr_t trapaddr, void *arg)
1180 {
1181 	return 0;
1182 }
1183