xref: /netbsd-src/sys/compat/linux/arch/i386/linux_machdep.c (revision 23c8222edbfb0f0932d88a8351d3a0cf817dfb9e)
1 /*	$NetBSD: linux_machdep.c,v 1.105 2004/11/13 08:58:23 christos Exp $	*/
2 
3 /*-
4  * Copyright (c) 1995, 2000 The NetBSD Foundation, Inc.
5  * All rights reserved.
6  *
7  * This code is derived from software contributed to The NetBSD Foundation
8  * by Frank van der Linden.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  * 3. All advertising materials mentioning features or use of this software
19  *    must display the following acknowledgement:
20  *	This product includes software developed by the NetBSD
21  *	Foundation, Inc. and its contributors.
22  * 4. Neither the name of The NetBSD Foundation nor the names of its
23  *    contributors may be used to endorse or promote products derived
24  *    from this software without specific prior written permission.
25  *
26  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
27  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
28  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
29  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
30  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36  * POSSIBILITY OF SUCH DAMAGE.
37  */
38 
39 #include <sys/cdefs.h>
40 __KERNEL_RCSID(0, "$NetBSD: linux_machdep.c,v 1.105 2004/11/13 08:58:23 christos Exp $");
41 
42 #if defined(_KERNEL_OPT)
43 #include "opt_vm86.h"
44 #include "opt_user_ldt.h"
45 #endif
46 
47 #include <sys/param.h>
48 #include <sys/systm.h>
49 #include <sys/signalvar.h>
50 #include <sys/kernel.h>
51 #include <sys/proc.h>
52 #include <sys/user.h>
53 #include <sys/buf.h>
54 #include <sys/reboot.h>
55 #include <sys/conf.h>
56 #include <sys/exec.h>
57 #include <sys/file.h>
58 #include <sys/callout.h>
59 #include <sys/malloc.h>
60 #include <sys/mbuf.h>
61 #include <sys/msgbuf.h>
62 #include <sys/mount.h>
63 #include <sys/vnode.h>
64 #include <sys/device.h>
65 #include <sys/sa.h>
66 #include <sys/syscallargs.h>
67 #include <sys/filedesc.h>
68 #include <sys/exec_elf.h>
69 #include <sys/disklabel.h>
70 #include <sys/ioctl.h>
71 #include <miscfs/specfs/specdev.h>
72 
73 #include <compat/linux/common/linux_types.h>
74 #include <compat/linux/common/linux_signal.h>
75 #include <compat/linux/common/linux_util.h>
76 #include <compat/linux/common/linux_ioctl.h>
77 #include <compat/linux/common/linux_hdio.h>
78 #include <compat/linux/common/linux_exec.h>
79 #include <compat/linux/common/linux_machdep.h>
80 #include <compat/linux/common/linux_errno.h>
81 
82 #include <compat/linux/linux_syscallargs.h>
83 
84 #include <machine/cpu.h>
85 #include <machine/cpufunc.h>
86 #include <machine/psl.h>
87 #include <machine/reg.h>
88 #include <machine/segments.h>
89 #include <machine/specialreg.h>
90 #include <machine/sysarch.h>
91 #include <machine/vm86.h>
92 #include <machine/vmparam.h>
93 
94 /*
95  * To see whether wscons is configured (for virtual console ioctl calls).
96  */
97 #if defined(_KERNEL_OPT)
98 #include "wsdisplay.h"
99 #endif
100 #if (NWSDISPLAY > 0)
101 #include <dev/wscons/wsconsio.h>
102 #include <dev/wscons/wsdisplay_usl_io.h>
103 #if defined(_KERNEL_OPT)
104 #include "opt_xserver.h"
105 #endif
106 #endif
107 
108 #ifdef USER_LDT
109 #include <machine/cpu.h>
110 int linux_read_ldt __P((struct lwp *, struct linux_sys_modify_ldt_args *,
111     register_t *));
112 int linux_write_ldt __P((struct lwp *, struct linux_sys_modify_ldt_args *,
113     register_t *));
114 #endif
115 
116 #ifdef DEBUG_LINUX
117 #define DPRINTF(a) uprintf a
118 #else
119 #define DPRINTF(a)
120 #endif
121 
122 static struct biosdisk_info *fd2biosinfo __P((struct proc *, struct file *));
123 extern struct disklist *x86_alldisks;
124 static void linux_save_ucontext __P((struct lwp *, struct trapframe *,
125     const sigset_t *, struct sigaltstack *, struct linux_ucontext *));
126 static void linux_save_sigcontext __P((struct lwp *, struct trapframe *,
127     const sigset_t *, struct linux_sigcontext *));
128 static int linux_restore_sigcontext __P((struct lwp *,
129     struct linux_sigcontext *, register_t *));
130 static void linux_rt_sendsig __P((const ksiginfo_t *, const sigset_t *));
131 static void linux_old_sendsig __P((const ksiginfo_t *, const sigset_t *));
132 
133 extern char linux_sigcode[], linux_rt_sigcode[];
134 /*
135  * Deal with some i386-specific things in the Linux emulation code.
136  */
137 
138 void
139 linux_setregs(l, epp, stack)
140 	struct lwp *l;
141 	struct exec_package *epp;
142 	u_long stack;
143 {
144 	struct pcb *pcb = &l->l_addr->u_pcb;
145 	struct trapframe *tf;
146 
147 #if NNPX > 0
148 	/* If we were using the FPU, forget about it. */
149 	if (npxproc == l)
150 		npxdrop();
151 #endif
152 
153 #ifdef USER_LDT
154 	pmap_ldt_cleanup(l);
155 #endif
156 
157 	l->l_md.md_flags &= ~MDL_USEDFPU;
158 
159 	if (i386_use_fxsave) {
160 		pcb->pcb_savefpu.sv_xmm.sv_env.en_cw = __Linux_NPXCW__;
161 		pcb->pcb_savefpu.sv_xmm.sv_env.en_mxcsr = __INITIAL_MXCSR__;
162 	} else
163 		pcb->pcb_savefpu.sv_87.sv_env.en_cw = __Linux_NPXCW__;
164 
165 	tf = l->l_md.md_regs;
166 	tf->tf_gs = GSEL(GUDATA_SEL, SEL_UPL);
167 	tf->tf_fs = GSEL(GUDATA_SEL, SEL_UPL);
168 	tf->tf_es = GSEL(GUDATA_SEL, SEL_UPL);
169 	tf->tf_ds = GSEL(GUDATA_SEL, SEL_UPL);
170 	tf->tf_edi = 0;
171 	tf->tf_esi = 0;
172 	tf->tf_ebp = 0;
173 	tf->tf_ebx = (int)l->l_proc->p_psstr;
174 	tf->tf_edx = 0;
175 	tf->tf_ecx = 0;
176 	tf->tf_eax = 0;
177 	tf->tf_eip = epp->ep_entry;
178 	tf->tf_cs = GSEL(GUCODEBIG_SEL, SEL_UPL);
179 	tf->tf_eflags = PSL_USERSET;
180 	tf->tf_esp = stack;
181 	tf->tf_ss = GSEL(GUDATA_SEL, SEL_UPL);
182 }
183 
184 /*
185  * Send an interrupt to process.
186  *
187  * Stack is set up to allow sigcode stored
188  * in u. to call routine, followed by kcall
189  * to sigreturn routine below.  After sigreturn
190  * resets the signal mask, the stack, and the
191  * frame pointer, it returns to the user
192  * specified pc, psl.
193  */
194 
195 void
196 linux_sendsig(const ksiginfo_t *ksi, const sigset_t *mask)
197 {
198 	if (SIGACTION(curproc, ksi->ksi_signo).sa_flags & SA_SIGINFO)
199 		linux_rt_sendsig(ksi, mask);
200 	else
201 		linux_old_sendsig(ksi, mask);
202 }
203 
204 
205 static void
206 linux_save_ucontext(l, tf, mask, sas, uc)
207 	struct lwp *l;
208 	struct trapframe *tf;
209 	const sigset_t *mask;
210 	struct sigaltstack *sas;
211 	struct linux_ucontext *uc;
212 {
213 	uc->uc_flags = 0;
214 	uc->uc_link = NULL;
215 	native_to_linux_sigaltstack(&uc->uc_stack, sas);
216 	linux_save_sigcontext(l, tf, mask, &uc->uc_mcontext);
217 	native_to_linux_sigset(&uc->uc_sigmask, mask);
218 	(void)memset(&uc->uc_fpregs_mem, 0, sizeof(uc->uc_fpregs_mem));
219 }
220 
221 static void
222 linux_save_sigcontext(l, tf, mask, sc)
223 	struct lwp *l;
224 	struct trapframe *tf;
225 	const sigset_t *mask;
226 	struct linux_sigcontext *sc;
227 {
228 	/* Save register context. */
229 #ifdef VM86
230 	if (tf->tf_eflags & PSL_VM) {
231 		sc->sc_gs = tf->tf_vm86_gs;
232 		sc->sc_fs = tf->tf_vm86_fs;
233 		sc->sc_es = tf->tf_vm86_es;
234 		sc->sc_ds = tf->tf_vm86_ds;
235 		sc->sc_eflags = get_vflags(l);
236 	} else
237 #endif
238 	{
239 		sc->sc_gs = tf->tf_gs;
240 		sc->sc_fs = tf->tf_fs;
241 		sc->sc_es = tf->tf_es;
242 		sc->sc_ds = tf->tf_ds;
243 		sc->sc_eflags = tf->tf_eflags;
244 	}
245 	sc->sc_edi = tf->tf_edi;
246 	sc->sc_esi = tf->tf_esi;
247 	sc->sc_esp = tf->tf_esp;
248 	sc->sc_ebp = tf->tf_ebp;
249 	sc->sc_ebx = tf->tf_ebx;
250 	sc->sc_edx = tf->tf_edx;
251 	sc->sc_ecx = tf->tf_ecx;
252 	sc->sc_eax = tf->tf_eax;
253 	sc->sc_eip = tf->tf_eip;
254 	sc->sc_cs = tf->tf_cs;
255 	sc->sc_esp_at_signal = tf->tf_esp;
256 	sc->sc_ss = tf->tf_ss;
257 	sc->sc_err = tf->tf_err;
258 	sc->sc_trapno = tf->tf_trapno;
259 	sc->sc_cr2 = l->l_addr->u_pcb.pcb_cr2;
260 	sc->sc_387 = NULL;
261 
262 	/* Save signal stack. */
263 	/* Linux doesn't save the onstack flag in sigframe */
264 
265 	/* Save signal mask. */
266 	native_to_linux_old_sigset(&sc->sc_mask, mask);
267 }
268 
269 static void
270 linux_rt_sendsig(const ksiginfo_t *ksi, const sigset_t *mask)
271 {
272 	struct lwp *l = curlwp;
273 	struct proc *p = l->l_proc;
274 	struct trapframe *tf;
275 	struct linux_rt_sigframe *fp, frame;
276 	int onstack;
277 	linux_siginfo_t *lsi;
278 	int sig = ksi->ksi_signo;
279 	sig_t catcher = SIGACTION(p, sig).sa_handler;
280 	struct sigaltstack *sas = &p->p_sigctx.ps_sigstk;
281 
282 	tf = l->l_md.md_regs;
283 	/* Do we need to jump onto the signal stack? */
284 	onstack = (sas->ss_flags & (SS_DISABLE | SS_ONSTACK)) == 0 &&
285 	    (SIGACTION(p, sig).sa_flags & SA_ONSTACK) != 0;
286 
287 
288 	/* Allocate space for the signal handler context. */
289 	if (onstack)
290 		fp = (struct linux_rt_sigframe *)((caddr_t)sas->ss_sp +
291 		    sas->ss_size);
292 	else
293 		fp = (struct linux_rt_sigframe *)tf->tf_esp;
294 	fp--;
295 
296 	DPRINTF(("rt: onstack = %d, fp = %p sig = %d eip = 0x%x cr2 = 0x%x\n",
297 	    onstack, fp, sig, tf->tf_eip, l->l_addr->u_pcb.pcb_cr2));
298 
299 	/* Build stack frame for signal trampoline. */
300 	frame.sf_handler = catcher;
301 	frame.sf_sig = native_to_linux_signo[sig];
302 	frame.sf_sip = &fp->sf_si;
303 	frame.sf_ucp = &fp->sf_uc;
304 
305 	/*
306 	 * XXX: the following code assumes that the constants for
307 	 * siginfo are the same between linux and NetBSD.
308 	 */
309 	(void)memset(lsi = &frame.sf_si, 0, sizeof(frame.sf_si));
310 	lsi->lsi_errno = native_to_linux_errno[ksi->ksi_errno];
311 	lsi->lsi_code = ksi->ksi_code;
312 	switch (lsi->lsi_signo = frame.sf_sig) {
313 	case LINUX_SIGILL:
314 	case LINUX_SIGFPE:
315 	case LINUX_SIGSEGV:
316 	case LINUX_SIGBUS:
317 	case LINUX_SIGTRAP:
318 		lsi->lsi_addr = ksi->ksi_addr;
319 		break;
320 	case LINUX_SIGCHLD:
321 		lsi->lsi_uid = ksi->ksi_uid;
322 		lsi->lsi_pid = ksi->ksi_pid;
323 		lsi->lsi_status = ksi->ksi_status;
324 		lsi->lsi_utime = ksi->ksi_utime;
325 		lsi->lsi_stime = ksi->ksi_stime;
326 		break;
327 	case LINUX_SIGIO:
328 		lsi->lsi_band = ksi->ksi_band;
329 		lsi->lsi_fd = ksi->ksi_fd;
330 		break;
331 	default:
332 		lsi->lsi_uid = ksi->ksi_uid;
333 		lsi->lsi_pid = ksi->ksi_pid;
334 		if (lsi->lsi_signo == LINUX_SIGALRM ||
335 		    lsi->lsi_signo >= LINUX_SIGRTMIN)
336 			lsi->lsi_value.sival_ptr = ksi->ksi_sigval.sival_ptr;
337 		break;
338 	}
339 
340 	/* Save register context. */
341 	linux_save_ucontext(l, tf, mask, sas, &frame.sf_uc);
342 
343 	if (copyout(&frame, fp, sizeof(frame)) != 0) {
344 		/*
345 		 * Process has trashed its stack; give it an illegal
346 		 * instruction to halt it in its tracks.
347 		 */
348 		sigexit(l, SIGILL);
349 		/* NOTREACHED */
350 	}
351 
352 	/*
353 	 * Build context to run handler in.
354 	 */
355 	tf->tf_gs = GSEL(GUDATA_SEL, SEL_UPL);
356 	tf->tf_fs = GSEL(GUDATA_SEL, SEL_UPL);
357 	tf->tf_es = GSEL(GUDATA_SEL, SEL_UPL);
358 	tf->tf_ds = GSEL(GUDATA_SEL, SEL_UPL);
359 	tf->tf_eip = ((int)p->p_sigctx.ps_sigcode) +
360 	    (linux_rt_sigcode - linux_sigcode);
361 	tf->tf_cs = GSEL(GUCODE_SEL, SEL_UPL);
362 	tf->tf_eflags &= ~(PSL_T|PSL_VM|PSL_AC);
363 	tf->tf_esp = (int)fp;
364 	tf->tf_ss = GSEL(GUDATA_SEL, SEL_UPL);
365 
366 	/* Remember that we're now on the signal stack. */
367 	if (onstack)
368 		sas->ss_flags |= SS_ONSTACK;
369 }
370 
371 static void
372 linux_old_sendsig(const ksiginfo_t *ksi, const sigset_t *mask)
373 {
374 	struct lwp *l = curlwp;
375 	struct proc *p = l->l_proc;
376 	struct trapframe *tf;
377 	struct linux_sigframe *fp, frame;
378 	int onstack;
379 	int sig = ksi->ksi_signo;
380 	sig_t catcher = SIGACTION(p, sig).sa_handler;
381 	struct sigaltstack *sas = &p->p_sigctx.ps_sigstk;
382 
383 	tf = l->l_md.md_regs;
384 
385 	/* Do we need to jump onto the signal stack? */
386 	onstack = (sas->ss_flags & (SS_DISABLE | SS_ONSTACK)) == 0 &&
387 	    (SIGACTION(p, sig).sa_flags & SA_ONSTACK) != 0;
388 
389 	/* Allocate space for the signal handler context. */
390 	if (onstack)
391 		fp = (struct linux_sigframe *) ((caddr_t)sas->ss_sp +
392 		    sas->ss_size);
393 	else
394 		fp = (struct linux_sigframe *)tf->tf_esp;
395 	fp--;
396 
397 	DPRINTF(("old: onstack = %d, fp = %p sig = %d eip = 0x%x cr2 = 0x%x\n",
398 	    onstack, fp, sig, tf->tf_eip, l->l_addr->u_pcb.pcb_cr2));
399 
400 	/* Build stack frame for signal trampoline. */
401 	frame.sf_handler = catcher;
402 	frame.sf_sig = native_to_linux_signo[sig];
403 
404 	linux_save_sigcontext(l, tf, mask, &frame.sf_sc);
405 
406 	if (copyout(&frame, fp, sizeof(frame)) != 0) {
407 		/*
408 		 * Process has trashed its stack; give it an illegal
409 		 * instruction to halt it in its tracks.
410 		 */
411 		sigexit(l, SIGILL);
412 		/* NOTREACHED */
413 	}
414 
415 	/*
416 	 * Build context to run handler in.
417 	 */
418 	tf->tf_gs = GSEL(GUDATA_SEL, SEL_UPL);
419 	tf->tf_fs = GSEL(GUDATA_SEL, SEL_UPL);
420 	tf->tf_es = GSEL(GUDATA_SEL, SEL_UPL);
421 	tf->tf_ds = GSEL(GUDATA_SEL, SEL_UPL);
422 	tf->tf_eip = (int)p->p_sigctx.ps_sigcode;
423 	tf->tf_cs = GSEL(GUCODEBIG_SEL, SEL_UPL);
424 	tf->tf_eflags &= ~(PSL_T|PSL_VM|PSL_AC);
425 	tf->tf_esp = (int)fp;
426 	tf->tf_ss = GSEL(GUDATA_SEL, SEL_UPL);
427 
428 	/* Remember that we're now on the signal stack. */
429 	if (onstack)
430 		sas->ss_flags |= SS_ONSTACK;
431 }
432 
433 /*
434  * System call to cleanup state after a signal
435  * has been taken.  Reset signal mask and
436  * stack state from context left by sendsig (above).
437  * Return to previous pc and psl as specified by
438  * context left by sendsig. Check carefully to
439  * make sure that the user has not modified the
440  * psl to gain improper privileges or to cause
441  * a machine fault.
442  */
443 int
444 linux_sys_rt_sigreturn(l, v, retval)
445 	struct lwp *l;
446 	void *v;
447 	register_t *retval;
448 {
449 	struct linux_sys_rt_sigreturn_args /* {
450 		syscallarg(struct linux_ucontext *) ucp;
451 	} */ *uap = v;
452 	struct linux_ucontext context, *ucp = SCARG(uap, ucp);
453 	int error;
454 
455 	/*
456 	 * The trampoline code hands us the context.
457 	 * It is unsafe to keep track of it ourselves, in the event that a
458 	 * program jumps out of a signal handler.
459 	 */
460 	if ((error = copyin(ucp, &context, sizeof(*ucp))) != 0)
461 		return error;
462 
463 	/* XXX XAX we can do better here by using more of the ucontext */
464 	return linux_restore_sigcontext(l, &context.uc_mcontext, retval);
465 }
466 
467 int
468 linux_sys_sigreturn(l, v, retval)
469 	struct lwp *l;
470 	void *v;
471 	register_t *retval;
472 {
473 	struct linux_sys_sigreturn_args /* {
474 		syscallarg(struct linux_sigcontext *) scp;
475 	} */ *uap = v;
476 	struct linux_sigcontext context, *scp = SCARG(uap, scp);
477 	int error;
478 
479 	/*
480 	 * The trampoline code hands us the context.
481 	 * It is unsafe to keep track of it ourselves, in the event that a
482 	 * program jumps out of a signal handler.
483 	 */
484 	if ((error = copyin((caddr_t)scp, &context, sizeof(*scp))) != 0)
485 		return error;
486 	return linux_restore_sigcontext(l, &context, retval);
487 }
488 
489 static int
490 linux_restore_sigcontext(l, scp, retval)
491 	struct lwp *l;
492 	struct linux_sigcontext *scp;
493 	register_t *retval;
494 {
495 	struct proc *p = l->l_proc;
496 	struct sigaltstack *sas = &p->p_sigctx.ps_sigstk;
497 	struct trapframe *tf;
498 	sigset_t mask;
499 	ssize_t ss_gap;
500 	/* Restore register context. */
501 	tf = l->l_md.md_regs;
502 
503 	DPRINTF(("sigreturn enter esp=%x eip=%x\n", tf->tf_esp, tf->tf_eip));
504 #ifdef VM86
505 	if (scp->sc_eflags & PSL_VM) {
506 		void syscall_vm86 __P((struct trapframe *));
507 
508 		tf->tf_vm86_gs = scp->sc_gs;
509 		tf->tf_vm86_fs = scp->sc_fs;
510 		tf->tf_vm86_es = scp->sc_es;
511 		tf->tf_vm86_ds = scp->sc_ds;
512 		set_vflags(l, scp->sc_eflags);
513 		p->p_md.md_syscall = syscall_vm86;
514 	} else
515 #endif
516 	{
517 		/*
518 		 * Check for security violations.  If we're returning to
519 		 * protected mode, the CPU will validate the segment registers
520 		 * automatically and generate a trap on violations.  We handle
521 		 * the trap, rather than doing all of the checking here.
522 		 */
523 		if (((scp->sc_eflags ^ tf->tf_eflags) & PSL_USERSTATIC) != 0 ||
524 		    !USERMODE(scp->sc_cs, scp->sc_eflags))
525 			return EINVAL;
526 
527 		tf->tf_gs = scp->sc_gs;
528 		tf->tf_fs = scp->sc_fs;
529 		tf->tf_es = scp->sc_es;
530 		tf->tf_ds = scp->sc_ds;
531 #ifdef VM86
532 		if (tf->tf_eflags & PSL_VM)
533 			(*p->p_emul->e_syscall_intern)(p);
534 #endif
535 		tf->tf_eflags = scp->sc_eflags;
536 	}
537 	tf->tf_edi = scp->sc_edi;
538 	tf->tf_esi = scp->sc_esi;
539 	tf->tf_ebp = scp->sc_ebp;
540 	tf->tf_ebx = scp->sc_ebx;
541 	tf->tf_edx = scp->sc_edx;
542 	tf->tf_ecx = scp->sc_ecx;
543 	tf->tf_eax = scp->sc_eax;
544 	tf->tf_eip = scp->sc_eip;
545 	tf->tf_cs = scp->sc_cs;
546 	tf->tf_esp = scp->sc_esp_at_signal;
547 	tf->tf_ss = scp->sc_ss;
548 
549 	/* Restore signal stack. */
550 	/*
551 	 * Linux really does it this way; it doesn't have space in sigframe
552 	 * to save the onstack flag.
553 	 */
554 	ss_gap = (ssize_t)
555 	    ((caddr_t) scp->sc_esp_at_signal - (caddr_t) sas->ss_sp);
556 	if (ss_gap >= 0 && ss_gap < sas->ss_size)
557 		sas->ss_flags |= SS_ONSTACK;
558 	else
559 		sas->ss_flags &= ~SS_ONSTACK;
560 
561 	/* Restore signal mask. */
562 	linux_old_to_native_sigset(&mask, &scp->sc_mask);
563 	(void) sigprocmask1(p, SIG_SETMASK, &mask, 0);
564 	DPRINTF(("sigreturn exit esp=%x eip=%x\n", tf->tf_esp, tf->tf_eip));
565 	return EJUSTRETURN;
566 }
567 
568 #ifdef USER_LDT
569 
570 int
571 linux_read_ldt(l, uap, retval)
572 	struct lwp *l;
573 	struct linux_sys_modify_ldt_args /* {
574 		syscallarg(int) func;
575 		syscallarg(void *) ptr;
576 		syscallarg(size_t) bytecount;
577 	} */ *uap;
578 	register_t *retval;
579 {
580 	struct proc *p = l->l_proc;
581 	struct i386_get_ldt_args gl;
582 	int error;
583 	caddr_t sg;
584 	char *parms;
585 
586 	DPRINTF(("linux_read_ldt!"));
587 	sg = stackgap_init(p, 0);
588 
589 	gl.start = 0;
590 	gl.desc = SCARG(uap, ptr);
591 	gl.num = SCARG(uap, bytecount) / sizeof(union descriptor);
592 
593 	parms = stackgap_alloc(p, &sg, sizeof(gl));
594 
595 	if ((error = copyout(&gl, parms, sizeof(gl))) != 0)
596 		return (error);
597 
598 	if ((error = i386_get_ldt(l, parms, retval)) != 0)
599 		return (error);
600 
601 	*retval *= sizeof(union descriptor);
602 	return (0);
603 }
604 
605 struct linux_ldt_info {
606 	u_int entry_number;
607 	u_long base_addr;
608 	u_int limit;
609 	u_int seg_32bit:1;
610 	u_int contents:2;
611 	u_int read_exec_only:1;
612 	u_int limit_in_pages:1;
613 	u_int seg_not_present:1;
614 	u_int useable:1;
615 };
616 
617 int
618 linux_write_ldt(l, uap, retval)
619 	struct lwp *l;
620 	struct linux_sys_modify_ldt_args /* {
621 		syscallarg(int) func;
622 		syscallarg(void *) ptr;
623 		syscallarg(size_t) bytecount;
624 	} */ *uap;
625 	register_t *retval;
626 {
627 	struct proc *p = l->l_proc;
628 	struct linux_ldt_info ldt_info;
629 	struct segment_descriptor sd;
630 	struct i386_set_ldt_args sl;
631 	int error;
632 	caddr_t sg;
633 	char *parms;
634 	int oldmode = (int)retval[0];
635 
636 	DPRINTF(("linux_write_ldt %d\n", oldmode));
637 	if (SCARG(uap, bytecount) != sizeof(ldt_info))
638 		return (EINVAL);
639 	if ((error = copyin(SCARG(uap, ptr), &ldt_info, sizeof(ldt_info))) != 0)
640 		return error;
641 	if (ldt_info.entry_number >= 8192)
642 		return (EINVAL);
643 	if (ldt_info.contents == 3) {
644 		if (oldmode)
645 			return (EINVAL);
646 		if (ldt_info.seg_not_present)
647 			return (EINVAL);
648 	}
649 
650 	if (ldt_info.base_addr == 0 && ldt_info.limit == 0 &&
651 	    (oldmode || (ldt_info.contents == 0 &&
652 	    ldt_info.read_exec_only == 1 && ldt_info.seg_32bit == 0 &&
653 	    ldt_info.limit_in_pages == 0 && ldt_info.seg_not_present == 1 &&
654 	    ldt_info.useable == 0))) {
655 		/* this means you should zero the ldt */
656 		(void)memset(&sd, 0, sizeof(sd));
657 	} else {
658 		sd.sd_lobase = ldt_info.base_addr & 0xffffff;
659 		sd.sd_hibase = (ldt_info.base_addr >> 24) & 0xff;
660 		sd.sd_lolimit = ldt_info.limit & 0xffff;
661 		sd.sd_hilimit = (ldt_info.limit >> 16) & 0xf;
662 		sd.sd_type = 16 | (ldt_info.contents << 2) |
663 		    (!ldt_info.read_exec_only << 1);
664 		sd.sd_dpl = SEL_UPL;
665 		sd.sd_p = !ldt_info.seg_not_present;
666 		sd.sd_def32 = ldt_info.seg_32bit;
667 		sd.sd_gran = ldt_info.limit_in_pages;
668 		if (!oldmode)
669 			sd.sd_xx = ldt_info.useable;
670 		else
671 			sd.sd_xx = 0;
672 	}
673 	sg = stackgap_init(p, 0);
674 	sl.start = ldt_info.entry_number;
675 	sl.desc = stackgap_alloc(p, &sg, sizeof(sd));
676 	sl.num = 1;
677 
678 	DPRINTF(("linux_write_ldt: idx=%d, base=0x%lx, limit=0x%x\n",
679 	    ldt_info.entry_number, ldt_info.base_addr, ldt_info.limit));
680 
681 	parms = stackgap_alloc(p, &sg, sizeof(sl));
682 
683 	if ((error = copyout(&sd, sl.desc, sizeof(sd))) != 0)
684 		return (error);
685 	if ((error = copyout(&sl, parms, sizeof(sl))) != 0)
686 		return (error);
687 
688 	if ((error = i386_set_ldt(l, parms, retval)) != 0)
689 		return (error);
690 
691 	*retval = 0;
692 	return (0);
693 }
694 
695 #endif /* USER_LDT */
696 
697 int
698 linux_sys_modify_ldt(l, v, retval)
699 	struct lwp *l;
700 	void *v;
701 	register_t *retval;
702 {
703 	struct linux_sys_modify_ldt_args /* {
704 		syscallarg(int) func;
705 		syscallarg(void *) ptr;
706 		syscallarg(size_t) bytecount;
707 	} */ *uap = v;
708 
709 	switch (SCARG(uap, func)) {
710 #ifdef USER_LDT
711 	case 0:
712 		return linux_read_ldt(l, uap, retval);
713 	case 1:
714 		retval[0] = 1;
715 		return linux_write_ldt(l, uap, retval);
716 	case 2:
717 #ifdef notyet
718 		return (linux_read_default_ldt(l, uap, retval);
719 #else
720 		return (ENOSYS);
721 #endif
722 	case 0x11:
723 		retval[0] = 0;
724 		return linux_write_ldt(l, uap, retval);
725 #endif /* USER_LDT */
726 
727 	default:
728 		return (ENOSYS);
729 	}
730 }
731 
732 /*
733  * XXX Pathetic hack to make svgalib work. This will fake the major
734  * device number of an opened VT so that svgalib likes it. grmbl.
735  * Should probably do it 'wrong the right way' and use a mapping
736  * array for all major device numbers, and map linux_mknod too.
737  */
738 dev_t
739 linux_fakedev(dev, raw)
740 	dev_t dev;
741 	int raw;
742 {
743 	extern const struct cdevsw ptc_cdevsw, pts_cdevsw;
744 	const struct cdevsw *cd = cdevsw_lookup(dev);
745 
746 	if (raw) {
747 #if (NWSDISPLAY > 0)
748 		extern const struct cdevsw wsdisplay_cdevsw;
749 		if (cd == &wsdisplay_cdevsw)
750 			return makedev(LINUX_CONS_MAJOR, (minor(dev) + 1));
751 #endif
752 	}
753 
754 	if (cd == &ptc_cdevsw)
755 		return makedev(LINUX_PTC_MAJOR, minor(dev));
756 	if (cd == &pts_cdevsw)
757 		return makedev(LINUX_PTS_MAJOR, minor(dev));
758 
759 	return dev;
760 }
761 
762 #if (NWSDISPLAY > 0)
763 /*
764  * That's not complete, but enough to get an X server running.
765  */
766 #define NR_KEYS 128
767 static const u_short plain_map[NR_KEYS] = {
768 	0x0200,	0x001b,	0x0031,	0x0032,	0x0033,	0x0034,	0x0035,	0x0036,
769 	0x0037,	0x0038,	0x0039,	0x0030,	0x002d,	0x003d,	0x007f,	0x0009,
770 	0x0b71,	0x0b77,	0x0b65,	0x0b72,	0x0b74,	0x0b79,	0x0b75,	0x0b69,
771 	0x0b6f,	0x0b70,	0x005b,	0x005d,	0x0201,	0x0702,	0x0b61,	0x0b73,
772 	0x0b64,	0x0b66,	0x0b67,	0x0b68,	0x0b6a,	0x0b6b,	0x0b6c,	0x003b,
773 	0x0027,	0x0060,	0x0700,	0x005c,	0x0b7a,	0x0b78,	0x0b63,	0x0b76,
774 	0x0b62,	0x0b6e,	0x0b6d,	0x002c,	0x002e,	0x002f,	0x0700,	0x030c,
775 	0x0703,	0x0020,	0x0207,	0x0100,	0x0101,	0x0102,	0x0103,	0x0104,
776 	0x0105,	0x0106,	0x0107,	0x0108,	0x0109,	0x0208,	0x0209,	0x0307,
777 	0x0308,	0x0309,	0x030b,	0x0304,	0x0305,	0x0306,	0x030a,	0x0301,
778 	0x0302,	0x0303,	0x0300,	0x0310,	0x0206,	0x0200,	0x003c,	0x010a,
779 	0x010b,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
780 	0x030e,	0x0702,	0x030d,	0x001c,	0x0701,	0x0205,	0x0114,	0x0603,
781 	0x0118,	0x0601,	0x0602,	0x0117,	0x0600,	0x0119,	0x0115,	0x0116,
782 	0x011a,	0x010c,	0x010d,	0x011b,	0x011c,	0x0110,	0x0311,	0x011d,
783 	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
784 }, shift_map[NR_KEYS] = {
785 	0x0200,	0x001b,	0x0021,	0x0040,	0x0023,	0x0024,	0x0025,	0x005e,
786 	0x0026,	0x002a,	0x0028,	0x0029,	0x005f,	0x002b,	0x007f,	0x0009,
787 	0x0b51,	0x0b57,	0x0b45,	0x0b52,	0x0b54,	0x0b59,	0x0b55,	0x0b49,
788 	0x0b4f,	0x0b50,	0x007b,	0x007d,	0x0201,	0x0702,	0x0b41,	0x0b53,
789 	0x0b44,	0x0b46,	0x0b47,	0x0b48,	0x0b4a,	0x0b4b,	0x0b4c,	0x003a,
790 	0x0022,	0x007e,	0x0700,	0x007c,	0x0b5a,	0x0b58,	0x0b43,	0x0b56,
791 	0x0b42,	0x0b4e,	0x0b4d,	0x003c,	0x003e,	0x003f,	0x0700,	0x030c,
792 	0x0703,	0x0020,	0x0207,	0x010a,	0x010b,	0x010c,	0x010d,	0x010e,
793 	0x010f,	0x0110,	0x0111,	0x0112,	0x0113,	0x0213,	0x0203,	0x0307,
794 	0x0308,	0x0309,	0x030b,	0x0304,	0x0305,	0x0306,	0x030a,	0x0301,
795 	0x0302,	0x0303,	0x0300,	0x0310,	0x0206,	0x0200,	0x003e,	0x010a,
796 	0x010b,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
797 	0x030e,	0x0702,	0x030d,	0x0200,	0x0701,	0x0205,	0x0114,	0x0603,
798 	0x020b,	0x0601,	0x0602,	0x0117,	0x0600,	0x020a,	0x0115,	0x0116,
799 	0x011a,	0x010c,	0x010d,	0x011b,	0x011c,	0x0110,	0x0311,	0x011d,
800 	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
801 }, altgr_map[NR_KEYS] = {
802 	0x0200,	0x0200,	0x0200,	0x0040,	0x0200,	0x0024,	0x0200,	0x0200,
803 	0x007b,	0x005b,	0x005d,	0x007d,	0x005c,	0x0200,	0x0200,	0x0200,
804 	0x0b71,	0x0b77,	0x0918,	0x0b72,	0x0b74,	0x0b79,	0x0b75,	0x0b69,
805 	0x0b6f,	0x0b70,	0x0200,	0x007e,	0x0201,	0x0702,	0x0914,	0x0b73,
806 	0x0917,	0x0919,	0x0b67,	0x0b68,	0x0b6a,	0x0b6b,	0x0b6c,	0x0200,
807 	0x0200,	0x0200,	0x0700,	0x0200,	0x0b7a,	0x0b78,	0x0916,	0x0b76,
808 	0x0915,	0x0b6e,	0x0b6d,	0x0200,	0x0200,	0x0200,	0x0700,	0x030c,
809 	0x0703,	0x0200,	0x0207,	0x050c,	0x050d,	0x050e,	0x050f,	0x0510,
810 	0x0511,	0x0512,	0x0513,	0x0514,	0x0515,	0x0208,	0x0202,	0x0911,
811 	0x0912,	0x0913,	0x030b,	0x090e,	0x090f,	0x0910,	0x030a,	0x090b,
812 	0x090c,	0x090d,	0x090a,	0x0310,	0x0206,	0x0200,	0x007c,	0x0516,
813 	0x0517,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
814 	0x030e,	0x0702,	0x030d,	0x0200,	0x0701,	0x0205,	0x0114,	0x0603,
815 	0x0118,	0x0601,	0x0602,	0x0117,	0x0600,	0x0119,	0x0115,	0x0116,
816 	0x011a,	0x010c,	0x010d,	0x011b,	0x011c,	0x0110,	0x0311,	0x011d,
817 	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
818 }, ctrl_map[NR_KEYS] = {
819 	0x0200,	0x0200,	0x0200,	0x0000,	0x001b,	0x001c,	0x001d,	0x001e,
820 	0x001f,	0x007f,	0x0200,	0x0200,	0x001f,	0x0200,	0x0008,	0x0200,
821 	0x0011,	0x0017,	0x0005,	0x0012,	0x0014,	0x0019,	0x0015,	0x0009,
822 	0x000f,	0x0010,	0x001b,	0x001d,	0x0201,	0x0702,	0x0001,	0x0013,
823 	0x0004,	0x0006,	0x0007,	0x0008,	0x000a,	0x000b,	0x000c,	0x0200,
824 	0x0007,	0x0000,	0x0700,	0x001c,	0x001a,	0x0018,	0x0003,	0x0016,
825 	0x0002,	0x000e,	0x000d,	0x0200,	0x020e,	0x007f,	0x0700,	0x030c,
826 	0x0703,	0x0000,	0x0207,	0x0100,	0x0101,	0x0102,	0x0103,	0x0104,
827 	0x0105,	0x0106,	0x0107,	0x0108,	0x0109,	0x0208,	0x0204,	0x0307,
828 	0x0308,	0x0309,	0x030b,	0x0304,	0x0305,	0x0306,	0x030a,	0x0301,
829 	0x0302,	0x0303,	0x0300,	0x0310,	0x0206,	0x0200,	0x0200,	0x010a,
830 	0x010b,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
831 	0x030e,	0x0702,	0x030d,	0x001c,	0x0701,	0x0205,	0x0114,	0x0603,
832 	0x0118,	0x0601,	0x0602,	0x0117,	0x0600,	0x0119,	0x0115,	0x0116,
833 	0x011a,	0x010c,	0x010d,	0x011b,	0x011c,	0x0110,	0x0311,	0x011d,
834 	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
835 };
836 
837 const u_short * const linux_keytabs[] = {
838 	plain_map, shift_map, altgr_map, altgr_map, ctrl_map
839 };
840 #endif
841 
842 static struct biosdisk_info *
843 fd2biosinfo(p, fp)
844 	struct proc *p;
845 	struct file *fp;
846 {
847 	struct vnode *vp;
848 	const char *blkname;
849 	char diskname[16];
850 	int i;
851 	struct nativedisk_info *nip;
852 	struct disklist *dl = x86_alldisks;
853 
854 	if (fp->f_type != DTYPE_VNODE)
855 		return NULL;
856 	vp = (struct vnode *)fp->f_data;
857 
858 	if (vp->v_type != VBLK)
859 		return NULL;
860 
861 	blkname = devsw_blk2name(major(vp->v_rdev));
862 	snprintf(diskname, sizeof diskname, "%s%u", blkname,
863 	    DISKUNIT(vp->v_rdev));
864 
865 	for (i = 0; i < dl->dl_nnativedisks; i++) {
866 		nip = &dl->dl_nativedisks[i];
867 		if (strcmp(diskname, nip->ni_devname))
868 			continue;
869 		if (nip->ni_nmatches != 0)
870 			return &dl->dl_biosdisks[nip->ni_biosmatches[0]];
871 	}
872 
873 	return NULL;
874 }
875 
876 
877 /*
878  * We come here in a last attempt to satisfy a Linux ioctl() call
879  */
880 int
881 linux_machdepioctl(p, v, retval)
882 	struct proc *p;
883 	void *v;
884 	register_t *retval;
885 {
886 	struct linux_sys_ioctl_args /* {
887 		syscallarg(int) fd;
888 		syscallarg(u_long) com;
889 		syscallarg(caddr_t) data;
890 	} */ *uap = v;
891 	struct sys_ioctl_args bia;
892 	u_long com;
893 	int error, error1;
894 #if (NWSDISPLAY > 0)
895 	struct vt_mode lvt;
896 	caddr_t bvtp, sg;
897 	struct kbentry kbe;
898 #endif
899 	struct linux_hd_geometry hdg;
900 	struct linux_hd_big_geometry hdg_big;
901 	struct biosdisk_info *bip;
902 	struct filedesc *fdp;
903 	struct file *fp;
904 	int fd;
905 	struct disklabel label, *labp;
906 	struct partinfo partp;
907 	int (*ioctlf)(struct file *, u_long, void *, struct proc *);
908 	u_long start, biostotal, realtotal;
909 	u_char heads, sectors;
910 	u_int cylinders;
911 	struct ioctl_pt pt;
912 
913 	fd = SCARG(uap, fd);
914 	SCARG(&bia, fd) = fd;
915 	SCARG(&bia, data) = SCARG(uap, data);
916 	com = SCARG(uap, com);
917 
918 	fdp = p->p_fd;
919 
920 	if ((fp = fd_getfile(fdp, fd)) == NULL)
921 		return (EBADF);
922 
923 	FILE_USE(fp);
924 
925 	switch (com) {
926 #if (NWSDISPLAY > 0)
927 	case LINUX_KDGKBMODE:
928 		com = KDGKBMODE;
929 		break;
930 	case LINUX_KDSKBMODE:
931 		com = KDSKBMODE;
932 		if ((unsigned)SCARG(uap, data) == LINUX_K_MEDIUMRAW)
933 			SCARG(&bia, data) = (caddr_t)K_RAW;
934 		break;
935 	case LINUX_KIOCSOUND:
936 		SCARG(&bia, data) =
937 		    (caddr_t)(((unsigned long)SCARG(&bia, data)) & 0xffff);
938 		/* fall through */
939 	case LINUX_KDMKTONE:
940 		com = KDMKTONE;
941 		break;
942 	case LINUX_KDSETMODE:
943 		com = KDSETMODE;
944 		break;
945 	case LINUX_KDGETMODE:
946 		/* KD_* values are equal to the wscons numbers */
947 		com = WSDISPLAYIO_GMODE;
948 		break;
949 	case LINUX_KDENABIO:
950 		com = KDENABIO;
951 		break;
952 	case LINUX_KDDISABIO:
953 		com = KDDISABIO;
954 		break;
955 	case LINUX_KDGETLED:
956 		com = KDGETLED;
957 		break;
958 	case LINUX_KDSETLED:
959 		com = KDSETLED;
960 		break;
961 	case LINUX_VT_OPENQRY:
962 		com = VT_OPENQRY;
963 		break;
964 	case LINUX_VT_GETMODE:
965 		SCARG(&bia, com) = VT_GETMODE;
966 		/* XXX NJWLWP */
967 		if ((error = sys_ioctl(curlwp, &bia, retval)))
968 			goto out;
969 		if ((error = copyin(SCARG(uap, data), (caddr_t)&lvt,
970 		    sizeof (struct vt_mode))))
971 			goto out;
972 		lvt.relsig = native_to_linux_signo[lvt.relsig];
973 		lvt.acqsig = native_to_linux_signo[lvt.acqsig];
974 		lvt.frsig = native_to_linux_signo[lvt.frsig];
975 		error = copyout((caddr_t)&lvt, SCARG(uap, data),
976 		    sizeof (struct vt_mode));
977 		goto out;
978 	case LINUX_VT_SETMODE:
979 		com = VT_SETMODE;
980 		if ((error = copyin(SCARG(uap, data), (caddr_t)&lvt,
981 		    sizeof (struct vt_mode))))
982 			goto out;
983 		lvt.relsig = linux_to_native_signo[lvt.relsig];
984 		lvt.acqsig = linux_to_native_signo[lvt.acqsig];
985 		lvt.frsig = linux_to_native_signo[lvt.frsig];
986 		sg = stackgap_init(p, 0);
987 		bvtp = stackgap_alloc(p, &sg, sizeof (struct vt_mode));
988 		if ((error = copyout(&lvt, bvtp, sizeof (struct vt_mode))))
989 			goto out;
990 		SCARG(&bia, data) = bvtp;
991 		break;
992 	case LINUX_VT_DISALLOCATE:
993 		/* XXX should use WSDISPLAYIO_DELSCREEN */
994 		error = 0;
995 		goto out;
996 	case LINUX_VT_RELDISP:
997 		com = VT_RELDISP;
998 		break;
999 	case LINUX_VT_ACTIVATE:
1000 		com = VT_ACTIVATE;
1001 		break;
1002 	case LINUX_VT_WAITACTIVE:
1003 		com = VT_WAITACTIVE;
1004 		break;
1005 	case LINUX_VT_GETSTATE:
1006 		com = VT_GETSTATE;
1007 		break;
1008 	case LINUX_KDGKBTYPE:
1009 	    {
1010 		static const u_int8_t kb101 = KB_101;
1011 
1012 		/* This is what Linux does. */
1013 		error = copyout(&kb101, SCARG(uap, data), 1);
1014 		goto out;
1015 	    }
1016 	case LINUX_KDGKBENT:
1017 		/*
1018 		 * The Linux KDGKBENT ioctl is different from the
1019 		 * SYSV original. So we handle it in machdep code.
1020 		 * XXX We should use keyboard mapping information
1021 		 * from wsdisplay, but this would be expensive.
1022 		 */
1023 		if ((error = copyin(SCARG(uap, data), &kbe,
1024 				    sizeof(struct kbentry))))
1025 			goto out;
1026 		if (kbe.kb_table >= sizeof(linux_keytabs) / sizeof(u_short *)
1027 		    || kbe.kb_index >= NR_KEYS) {
1028 			error = EINVAL;
1029 			goto out;
1030 		}
1031 		kbe.kb_value = linux_keytabs[kbe.kb_table][kbe.kb_index];
1032 		error = copyout(&kbe, SCARG(uap, data),
1033 				sizeof(struct kbentry));
1034 		goto out;
1035 #endif
1036 	case LINUX_HDIO_GETGEO:
1037 	case LINUX_HDIO_GETGEO_BIG:
1038 		/*
1039 		 * Try to mimic Linux behaviour: return the BIOS geometry
1040 		 * if possible (extending its # of cylinders if it's beyond
1041 		 * the 1023 limit), fall back to the MI geometry (i.e.
1042 		 * the real geometry) if not found, by returning an
1043 		 * error. See common/linux_hdio.c
1044 		 */
1045 		bip = fd2biosinfo(p, fp);
1046 		ioctlf = fp->f_ops->fo_ioctl;
1047 		error = ioctlf(fp, DIOCGDEFLABEL, (caddr_t)&label, p);
1048 		error1 = ioctlf(fp, DIOCGPART, (caddr_t)&partp, p);
1049 		if (error != 0 && error1 != 0) {
1050 			error = error1;
1051 			goto out;
1052 		}
1053 		labp = error != 0 ? &label : partp.disklab;
1054 		start = error1 != 0 ? partp.part->p_offset : 0;
1055 		if (bip != NULL && bip->bi_head != 0 && bip->bi_sec != 0
1056 		    && bip->bi_cyl != 0) {
1057 			heads = bip->bi_head;
1058 			sectors = bip->bi_sec;
1059 			cylinders = bip->bi_cyl;
1060 			biostotal = heads * sectors * cylinders;
1061 			realtotal = labp->d_ntracks * labp->d_nsectors *
1062 			    labp->d_ncylinders;
1063 			if (realtotal > biostotal)
1064 				cylinders = realtotal / (heads * sectors);
1065 		} else {
1066 			heads = labp->d_ntracks;
1067 			cylinders = labp->d_ncylinders;
1068 			sectors = labp->d_nsectors;
1069 		}
1070 		if (com == LINUX_HDIO_GETGEO) {
1071 			hdg.start = start;
1072 			hdg.heads = heads;
1073 			hdg.cylinders = cylinders;
1074 			hdg.sectors = sectors;
1075 			error = copyout(&hdg, SCARG(uap, data), sizeof hdg);
1076 			goto out;
1077 		} else {
1078 			hdg_big.start = start;
1079 			hdg_big.heads = heads;
1080 			hdg_big.cylinders = cylinders;
1081 			hdg_big.sectors = sectors;
1082 			error = copyout(&hdg_big, SCARG(uap, data),
1083 			    sizeof hdg_big);
1084 			goto out;
1085 		}
1086 
1087 	default:
1088 		/*
1089 		 * Unknown to us. If it's on a device, just pass it through
1090 		 * using PTIOCLINUX, the device itself might be able to
1091 		 * make some sense of it.
1092 		 * XXX hack: if the function returns EJUSTRETURN,
1093 		 * it has stuffed a sysctl return value in pt.data.
1094 		 */
1095 		FILE_USE(fp);
1096 		ioctlf = fp->f_ops->fo_ioctl;
1097 		pt.com = SCARG(uap, com);
1098 		pt.data = SCARG(uap, data);
1099 		error = ioctlf(fp, PTIOCLINUX, (caddr_t)&pt, p);
1100 		FILE_UNUSE(fp, p);
1101 		if (error == EJUSTRETURN) {
1102 			retval[0] = (register_t)pt.data;
1103 			error = 0;
1104 		}
1105 
1106 		if (error == ENOTTY)
1107 			DPRINTF(("linux_machdepioctl: invalid ioctl %08lx\n",
1108 			    com));
1109 		goto out;
1110 	}
1111 	SCARG(&bia, com) = com;
1112 	/* XXX NJWLWP */
1113 	error = sys_ioctl(curlwp, &bia, retval);
1114 out:
1115 	FILE_UNUSE(fp ,p);
1116 	return error;
1117 }
1118 
1119 /*
1120  * Set I/O permissions for a process. Just set the maximum level
1121  * right away (ignoring the argument), otherwise we would have
1122  * to rely on I/O permission maps, which are not implemented.
1123  */
1124 int
1125 linux_sys_iopl(l, v, retval)
1126 	struct lwp *l;
1127 	void *v;
1128 	register_t *retval;
1129 {
1130 #if 0
1131 	struct linux_sys_iopl_args /* {
1132 		syscallarg(int) level;
1133 	} */ *uap = v;
1134 #endif
1135 	struct proc *p = l->l_proc;
1136 	struct trapframe *fp = l->l_md.md_regs;
1137 
1138 	if (suser(p->p_ucred, &p->p_acflag) != 0)
1139 		return EPERM;
1140 	fp->tf_eflags |= PSL_IOPL;
1141 	*retval = 0;
1142 	return 0;
1143 }
1144 
1145 /*
1146  * See above. If a root process tries to set access to an I/O port,
1147  * just let it have the whole range.
1148  */
1149 int
1150 linux_sys_ioperm(l, v, retval)
1151 	struct lwp *l;
1152 	void *v;
1153 	register_t *retval;
1154 {
1155 	struct linux_sys_ioperm_args /* {
1156 		syscallarg(unsigned int) lo;
1157 		syscallarg(unsigned int) hi;
1158 		syscallarg(int) val;
1159 	} */ *uap = v;
1160 	struct proc *p = l->l_proc;
1161 	struct trapframe *fp = l->l_md.md_regs;
1162 
1163 	if (suser(p->p_ucred, &p->p_acflag) != 0)
1164 		return EPERM;
1165 	if (SCARG(uap, val))
1166 		fp->tf_eflags |= PSL_IOPL;
1167 	*retval = 0;
1168 	return 0;
1169 }
1170