xref: /netbsd-src/sys/compat/linux/arch/i386/linux_machdep.c (revision f648d12d47727113ad5330b0753bb2f2ef8e1045)
1 /*	$NetBSD: linux_machdep.c,v 1.102 2004/01/28 10:48:55 yamt Exp $	*/
2 
3 /*-
4  * Copyright (c) 1995, 2000 The NetBSD Foundation, Inc.
5  * All rights reserved.
6  *
7  * This code is derived from software contributed to The NetBSD Foundation
8  * by Frank van der Linden.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  * 3. All advertising materials mentioning features or use of this software
19  *    must display the following acknowledgement:
20  *	This product includes software developed by the NetBSD
21  *	Foundation, Inc. and its contributors.
22  * 4. Neither the name of The NetBSD Foundation nor the names of its
23  *    contributors may be used to endorse or promote products derived
24  *    from this software without specific prior written permission.
25  *
26  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
27  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
28  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
29  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
30  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36  * POSSIBILITY OF SUCH DAMAGE.
37  */
38 
39 #include <sys/cdefs.h>
40 __KERNEL_RCSID(0, "$NetBSD: linux_machdep.c,v 1.102 2004/01/28 10:48:55 yamt Exp $");
41 
42 #if defined(_KERNEL_OPT)
43 #include "opt_vm86.h"
44 #include "opt_user_ldt.h"
45 #endif
46 
47 #include <sys/param.h>
48 #include <sys/systm.h>
49 #include <sys/signalvar.h>
50 #include <sys/kernel.h>
51 #include <sys/proc.h>
52 #include <sys/user.h>
53 #include <sys/buf.h>
54 #include <sys/reboot.h>
55 #include <sys/conf.h>
56 #include <sys/exec.h>
57 #include <sys/file.h>
58 #include <sys/callout.h>
59 #include <sys/malloc.h>
60 #include <sys/mbuf.h>
61 #include <sys/msgbuf.h>
62 #include <sys/mount.h>
63 #include <sys/vnode.h>
64 #include <sys/device.h>
65 #include <sys/sa.h>
66 #include <sys/syscallargs.h>
67 #include <sys/filedesc.h>
68 #include <sys/exec_elf.h>
69 #include <sys/disklabel.h>
70 #include <sys/ioctl.h>
71 #include <miscfs/specfs/specdev.h>
72 
73 #include <compat/linux/common/linux_types.h>
74 #include <compat/linux/common/linux_signal.h>
75 #include <compat/linux/common/linux_util.h>
76 #include <compat/linux/common/linux_ioctl.h>
77 #include <compat/linux/common/linux_hdio.h>
78 #include <compat/linux/common/linux_exec.h>
79 #include <compat/linux/common/linux_machdep.h>
80 #include <compat/linux/common/linux_errno.h>
81 
82 #include <compat/linux/linux_syscallargs.h>
83 
84 #include <machine/cpu.h>
85 #include <machine/cpufunc.h>
86 #include <machine/psl.h>
87 #include <machine/reg.h>
88 #include <machine/segments.h>
89 #include <machine/specialreg.h>
90 #include <machine/sysarch.h>
91 #include <machine/vm86.h>
92 #include <machine/vmparam.h>
93 
94 /*
95  * To see whether wscons is configured (for virtual console ioctl calls).
96  */
97 #if defined(_KERNEL_OPT)
98 #include "wsdisplay.h"
99 #endif
100 #if (NWSDISPLAY > 0)
101 #include <dev/wscons/wsconsio.h>
102 #include <dev/wscons/wsdisplay_usl_io.h>
103 #if defined(_KERNEL_OPT)
104 #include "opt_xserver.h"
105 #endif
106 #endif
107 
108 #ifdef USER_LDT
109 #include <machine/cpu.h>
110 int linux_read_ldt __P((struct lwp *, struct linux_sys_modify_ldt_args *,
111     register_t *));
112 int linux_write_ldt __P((struct lwp *, struct linux_sys_modify_ldt_args *,
113     register_t *));
114 #endif
115 
116 #ifdef DEBUG_LINUX
117 #define DPRINTF(a) uprintf a
118 #else
119 #define DPRINTF(a)
120 #endif
121 
122 static struct biosdisk_info *fd2biosinfo __P((struct proc *, struct file *));
123 extern struct disklist *i386_alldisks;
124 static void linux_save_ucontext __P((struct lwp *, struct trapframe *,
125     const sigset_t *, struct sigaltstack *, struct linux_ucontext *));
126 static void linux_save_sigcontext __P((struct lwp *, struct trapframe *,
127     const sigset_t *, struct linux_sigcontext *));
128 static int linux_restore_sigcontext __P((struct lwp *,
129     struct linux_sigcontext *, register_t *));
130 static void linux_rt_sendsig __P((const ksiginfo_t *, const sigset_t *));
131 static void linux_old_sendsig __P((const ksiginfo_t *, const sigset_t *));
132 
133 extern char linux_sigcode[], linux_rt_sigcode[];
134 /*
135  * Deal with some i386-specific things in the Linux emulation code.
136  */
137 
138 void
139 linux_setregs(l, epp, stack)
140 	struct lwp *l;
141 	struct exec_package *epp;
142 	u_long stack;
143 {
144 	struct pcb *pcb = &l->l_addr->u_pcb;
145 	struct trapframe *tf;
146 
147 #if NNPX > 0
148 	/* If we were using the FPU, forget about it. */
149 	if (npxproc == l)
150 		npxdrop();
151 #endif
152 
153 #ifdef USER_LDT
154 	pmap_ldt_cleanup(l);
155 #endif
156 
157 	l->l_md.md_flags &= ~MDL_USEDFPU;
158 
159 	if (i386_use_fxsave) {
160 		pcb->pcb_savefpu.sv_xmm.sv_env.en_cw = __Linux_NPXCW__;
161 		pcb->pcb_savefpu.sv_xmm.sv_env.en_mxcsr = __INITIAL_MXCSR__;
162 	} else
163 		pcb->pcb_savefpu.sv_87.sv_env.en_cw = __Linux_NPXCW__;
164 
165 	tf = l->l_md.md_regs;
166 	tf->tf_gs = GSEL(GUDATA_SEL, SEL_UPL);
167 	tf->tf_fs = GSEL(GUDATA_SEL, SEL_UPL);
168 	tf->tf_es = GSEL(GUDATA_SEL, SEL_UPL);
169 	tf->tf_ds = GSEL(GUDATA_SEL, SEL_UPL);
170 	tf->tf_edi = 0;
171 	tf->tf_esi = 0;
172 	tf->tf_ebp = 0;
173 	tf->tf_ebx = (int)l->l_proc->p_psstr;
174 	tf->tf_edx = 0;
175 	tf->tf_ecx = 0;
176 	tf->tf_eax = 0;
177 	tf->tf_eip = epp->ep_entry;
178 	tf->tf_cs = GSEL(GUCODEBIG_SEL, SEL_UPL);
179 	tf->tf_eflags = PSL_USERSET;
180 	tf->tf_esp = stack;
181 	tf->tf_ss = GSEL(GUDATA_SEL, SEL_UPL);
182 }
183 
184 /*
185  * Send an interrupt to process.
186  *
187  * Stack is set up to allow sigcode stored
188  * in u. to call routine, followed by kcall
189  * to sigreturn routine below.  After sigreturn
190  * resets the signal mask, the stack, and the
191  * frame pointer, it returns to the user
192  * specified pc, psl.
193  */
194 
195 void
196 linux_sendsig(const ksiginfo_t *ksi, const sigset_t *mask)
197 {
198 	if (SIGACTION(curproc, ksi->ksi_signo).sa_flags & SA_SIGINFO)
199 		linux_rt_sendsig(ksi, mask);
200 	else
201 		linux_old_sendsig(ksi, mask);
202 }
203 
204 
205 static void
206 linux_save_ucontext(l, tf, mask, sas, uc)
207 	struct lwp *l;
208 	struct trapframe *tf;
209 	const sigset_t *mask;
210 	struct sigaltstack *sas;
211 	struct linux_ucontext *uc;
212 {
213 	uc->uc_flags = 0;
214 	uc->uc_link = NULL;
215 	native_to_linux_sigaltstack(&uc->uc_stack, sas);
216 	linux_save_sigcontext(l, tf, mask, &uc->uc_mcontext);
217 	native_to_linux_sigset(&uc->uc_sigmask, mask);
218 	(void)memset(&uc->uc_fpregs_mem, 0, sizeof(uc->uc_fpregs_mem));
219 }
220 
221 static void
222 linux_save_sigcontext(l, tf, mask, sc)
223 	struct lwp *l;
224 	struct trapframe *tf;
225 	const sigset_t *mask;
226 	struct linux_sigcontext *sc;
227 {
228 	/* Save register context. */
229 #ifdef VM86
230 	if (tf->tf_eflags & PSL_VM) {
231 		sc->sc_gs = tf->tf_vm86_gs;
232 		sc->sc_fs = tf->tf_vm86_fs;
233 		sc->sc_es = tf->tf_vm86_es;
234 		sc->sc_ds = tf->tf_vm86_ds;
235 		sc->sc_eflags = get_vflags(l);
236 	} else
237 #endif
238 	{
239 		sc->sc_gs = tf->tf_gs;
240 		sc->sc_fs = tf->tf_fs;
241 		sc->sc_es = tf->tf_es;
242 		sc->sc_ds = tf->tf_ds;
243 		sc->sc_eflags = tf->tf_eflags;
244 	}
245 	sc->sc_edi = tf->tf_edi;
246 	sc->sc_esi = tf->tf_esi;
247 	sc->sc_esp = tf->tf_esp;
248 	sc->sc_ebp = tf->tf_ebp;
249 	sc->sc_ebx = tf->tf_ebx;
250 	sc->sc_edx = tf->tf_edx;
251 	sc->sc_ecx = tf->tf_ecx;
252 	sc->sc_eax = tf->tf_eax;
253 	sc->sc_eip = tf->tf_eip;
254 	sc->sc_cs = tf->tf_cs;
255 	sc->sc_esp_at_signal = tf->tf_esp;
256 	sc->sc_ss = tf->tf_ss;
257 	sc->sc_err = tf->tf_err;
258 	sc->sc_trapno = tf->tf_trapno;
259 	sc->sc_cr2 = l->l_addr->u_pcb.pcb_cr2;
260 	sc->sc_387 = NULL;
261 
262 	/* Save signal stack. */
263 	/* Linux doesn't save the onstack flag in sigframe */
264 
265 	/* Save signal mask. */
266 	native_to_linux_old_sigset(&sc->sc_mask, mask);
267 }
268 
269 static void
270 linux_rt_sendsig(const ksiginfo_t *ksi, const sigset_t *mask)
271 {
272 	struct lwp *l = curlwp;
273 	struct proc *p = l->l_proc;
274 	struct trapframe *tf;
275 	struct linux_rt_sigframe *fp, frame;
276 	int onstack;
277 	linux_siginfo_t *lsi;
278 	int sig = ksi->ksi_signo;
279 	sig_t catcher = SIGACTION(p, sig).sa_handler;
280 	struct sigaltstack *sas = &p->p_sigctx.ps_sigstk;
281 
282 	tf = l->l_md.md_regs;
283 	/* Do we need to jump onto the signal stack? */
284 	onstack = (sas->ss_flags & (SS_DISABLE | SS_ONSTACK)) == 0 &&
285 	    (SIGACTION(p, sig).sa_flags & SA_ONSTACK) != 0;
286 
287 
288 	/* Allocate space for the signal handler context. */
289 	if (onstack)
290 		fp = (struct linux_rt_sigframe *)((caddr_t)sas->ss_sp +
291 		    sas->ss_size);
292 	else
293 		fp = (struct linux_rt_sigframe *)tf->tf_esp;
294 	fp--;
295 
296 	DPRINTF(("rt: onstack = %d, fp = %p sig = %d eip = 0x%x cr2 = 0x%x\n",
297 	    onstack, fp, sig, tf->tf_eip, l->l_addr->u_pcb.pcb_cr2));
298 
299 	/* Build stack frame for signal trampoline. */
300 	frame.sf_handler = catcher;
301 	frame.sf_sig = native_to_linux_signo[sig];
302 	frame.sf_sip = &fp->sf_si;
303 	frame.sf_ucp = &fp->sf_uc;
304 
305 	/*
306 	 * XXX: the following code assumes that the constants for
307 	 * siginfo are the same between linux and NetBSD.
308 	 */
309 	(void)memset(lsi = &frame.sf_si, 0, sizeof(frame.sf_si));
310 	lsi->lsi_errno = native_to_linux_errno[ksi->ksi_errno];
311 	lsi->lsi_code = ksi->ksi_code;
312 	switch (lsi->lsi_signo = frame.sf_sig) {
313 	case LINUX_SIGILL:
314 	case LINUX_SIGFPE:
315 	case LINUX_SIGSEGV:
316 	case LINUX_SIGBUS:
317 	case LINUX_SIGTRAP:
318 		lsi->lsi_addr = ksi->ksi_addr;
319 		break;
320 	case LINUX_SIGCHLD:
321 		lsi->lsi_uid = ksi->ksi_uid;
322 		lsi->lsi_pid = ksi->ksi_pid;
323 		lsi->lsi_status = ksi->ksi_status;
324 		lsi->lsi_utime = ksi->ksi_utime;
325 		lsi->lsi_stime = ksi->ksi_stime;
326 		break;
327 	case LINUX_SIGIO:
328 		lsi->lsi_band = ksi->ksi_band;
329 		lsi->lsi_fd = ksi->ksi_fd;
330 		break;
331 	default:
332 		lsi->lsi_uid = ksi->ksi_uid;
333 		lsi->lsi_pid = ksi->ksi_pid;
334 		if (lsi->lsi_signo == LINUX_SIGALRM ||
335 		    lsi->lsi_signo >= LINUX_SIGRTMIN)
336 			lsi->lsi_value.sival_ptr = ksi->ksi_sigval.sival_ptr;
337 		break;
338 	}
339 
340 	/* Save register context. */
341 	linux_save_ucontext(l, tf, mask, sas, &frame.sf_uc);
342 
343 	if (copyout(&frame, fp, sizeof(frame)) != 0) {
344 		/*
345 		 * Process has trashed its stack; give it an illegal
346 		 * instruction to halt it in its tracks.
347 		 */
348 		sigexit(l, SIGILL);
349 		/* NOTREACHED */
350 	}
351 
352 	/*
353 	 * Build context to run handler in.
354 	 */
355 	tf->tf_gs = GSEL(GUDATA_SEL, SEL_UPL);
356 	tf->tf_fs = GSEL(GUDATA_SEL, SEL_UPL);
357 	tf->tf_es = GSEL(GUDATA_SEL, SEL_UPL);
358 	tf->tf_ds = GSEL(GUDATA_SEL, SEL_UPL);
359 	tf->tf_eip = ((int)p->p_sigctx.ps_sigcode) +
360 	    (linux_rt_sigcode - linux_sigcode);
361 	tf->tf_cs = GSEL(GUCODE_SEL, SEL_UPL);
362 	tf->tf_eflags &= ~(PSL_T|PSL_VM|PSL_AC);
363 	tf->tf_esp = (int)fp;
364 	tf->tf_ss = GSEL(GUDATA_SEL, SEL_UPL);
365 
366 	/* Remember that we're now on the signal stack. */
367 	if (onstack)
368 		sas->ss_flags |= SS_ONSTACK;
369 }
370 
371 static void
372 linux_old_sendsig(const ksiginfo_t *ksi, const sigset_t *mask)
373 {
374 	struct lwp *l = curlwp;
375 	struct proc *p = l->l_proc;
376 	struct trapframe *tf;
377 	struct linux_sigframe *fp, frame;
378 	int onstack;
379 	int sig = ksi->ksi_signo;
380 	sig_t catcher = SIGACTION(p, sig).sa_handler;
381 	struct sigaltstack *sas = &p->p_sigctx.ps_sigstk;
382 
383 	tf = l->l_md.md_regs;
384 
385 	/* Do we need to jump onto the signal stack? */
386 	onstack = (sas->ss_flags & (SS_DISABLE | SS_ONSTACK)) == 0 &&
387 	    (SIGACTION(p, sig).sa_flags & SA_ONSTACK) != 0;
388 
389 	/* Allocate space for the signal handler context. */
390 	if (onstack)
391 		fp = (struct linux_sigframe *) ((caddr_t)sas->ss_sp +
392 		    sas->ss_size);
393 	else
394 		fp = (struct linux_sigframe *)tf->tf_esp;
395 	fp--;
396 
397 	DPRINTF(("old: onstack = %d, fp = %p sig = %d eip = 0x%x cr2 = 0x%x\n",
398 	    onstack, fp, sig, tf->tf_eip, l->l_addr->u_pcb.pcb_cr2));
399 
400 	/* Build stack frame for signal trampoline. */
401 	frame.sf_handler = catcher;
402 	frame.sf_sig = native_to_linux_signo[sig];
403 
404 	linux_save_sigcontext(l, tf, mask, &frame.sf_sc);
405 
406 	if (copyout(&frame, fp, sizeof(frame)) != 0) {
407 		/*
408 		 * Process has trashed its stack; give it an illegal
409 		 * instruction to halt it in its tracks.
410 		 */
411 		sigexit(l, SIGILL);
412 		/* NOTREACHED */
413 	}
414 
415 	/*
416 	 * Build context to run handler in.
417 	 */
418 	tf->tf_gs = GSEL(GUDATA_SEL, SEL_UPL);
419 	tf->tf_fs = GSEL(GUDATA_SEL, SEL_UPL);
420 	tf->tf_es = GSEL(GUDATA_SEL, SEL_UPL);
421 	tf->tf_ds = GSEL(GUDATA_SEL, SEL_UPL);
422 	tf->tf_eip = (int)p->p_sigctx.ps_sigcode;
423 	tf->tf_cs = GSEL(GUCODEBIG_SEL, SEL_UPL);
424 	tf->tf_eflags &= ~(PSL_T|PSL_VM|PSL_AC);
425 	tf->tf_esp = (int)fp;
426 	tf->tf_ss = GSEL(GUDATA_SEL, SEL_UPL);
427 
428 	/* Remember that we're now on the signal stack. */
429 	if (onstack)
430 		sas->ss_flags |= SS_ONSTACK;
431 }
432 
433 /*
434  * System call to cleanup state after a signal
435  * has been taken.  Reset signal mask and
436  * stack state from context left by sendsig (above).
437  * Return to previous pc and psl as specified by
438  * context left by sendsig. Check carefully to
439  * make sure that the user has not modified the
440  * psl to gain improper privileges or to cause
441  * a machine fault.
442  */
443 int
444 linux_sys_rt_sigreturn(l, v, retval)
445 	struct lwp *l;
446 	void *v;
447 	register_t *retval;
448 {
449 	struct linux_sys_rt_sigreturn_args /* {
450 		syscallarg(struct linux_ucontext *) ucp;
451 	} */ *uap = v;
452 	struct linux_ucontext context, *ucp = SCARG(uap, ucp);
453 	int error;
454 
455 	/*
456 	 * The trampoline code hands us the context.
457 	 * It is unsafe to keep track of it ourselves, in the event that a
458 	 * program jumps out of a signal handler.
459 	 */
460 	if ((error = copyin(ucp, &context, sizeof(*ucp))) != 0)
461 		return error;
462 
463 	/* XXX XAX we can do better here by using more of the ucontext */
464 	return linux_restore_sigcontext(l, &context.uc_mcontext, retval);
465 }
466 
467 int
468 linux_sys_sigreturn(l, v, retval)
469 	struct lwp *l;
470 	void *v;
471 	register_t *retval;
472 {
473 	struct linux_sys_sigreturn_args /* {
474 		syscallarg(struct linux_sigcontext *) scp;
475 	} */ *uap = v;
476 	struct linux_sigcontext context, *scp = SCARG(uap, scp);
477 	int error;
478 
479 	/*
480 	 * The trampoline code hands us the context.
481 	 * It is unsafe to keep track of it ourselves, in the event that a
482 	 * program jumps out of a signal handler.
483 	 */
484 	if ((error = copyin((caddr_t)scp, &context, sizeof(*scp))) != 0)
485 		return error;
486 	return linux_restore_sigcontext(l, &context, retval);
487 }
488 
489 static int
490 linux_restore_sigcontext(l, scp, retval)
491 	struct lwp *l;
492 	struct linux_sigcontext *scp;
493 	register_t *retval;
494 {
495 	struct proc *p = l->l_proc;
496 	struct sigaltstack *sas = &p->p_sigctx.ps_sigstk;
497 	struct trapframe *tf;
498 	sigset_t mask;
499 	ssize_t ss_gap;
500 	/* Restore register context. */
501 	tf = l->l_md.md_regs;
502 
503 	DPRINTF(("sigreturn enter esp=%x eip=%x\n", tf->tf_esp, tf->tf_eip));
504 #ifdef VM86
505 	if (scp->sc_eflags & PSL_VM) {
506 		void syscall_vm86 __P((struct trapframe *));
507 
508 		tf->tf_vm86_gs = scp->sc_gs;
509 		tf->tf_vm86_fs = scp->sc_fs;
510 		tf->tf_vm86_es = scp->sc_es;
511 		tf->tf_vm86_ds = scp->sc_ds;
512 		set_vflags(l, scp->sc_eflags);
513 		p->p_md.md_syscall = syscall_vm86;
514 	} else
515 #endif
516 	{
517 		/*
518 		 * Check for security violations.  If we're returning to
519 		 * protected mode, the CPU will validate the segment registers
520 		 * automatically and generate a trap on violations.  We handle
521 		 * the trap, rather than doing all of the checking here.
522 		 */
523 		if (((scp->sc_eflags ^ tf->tf_eflags) & PSL_USERSTATIC) != 0 ||
524 		    !USERMODE(scp->sc_cs, scp->sc_eflags))
525 			return EINVAL;
526 
527 		tf->tf_gs = scp->sc_gs;
528 		tf->tf_fs = scp->sc_fs;
529 		tf->tf_es = scp->sc_es;
530 		tf->tf_ds = scp->sc_ds;
531 #ifdef VM86
532 		if (tf->tf_eflags & PSL_VM)
533 			(*p->p_emul->e_syscall_intern)(p);
534 #endif
535 		tf->tf_eflags = scp->sc_eflags;
536 	}
537 	tf->tf_edi = scp->sc_edi;
538 	tf->tf_esi = scp->sc_esi;
539 	tf->tf_ebp = scp->sc_ebp;
540 	tf->tf_ebx = scp->sc_ebx;
541 	tf->tf_edx = scp->sc_edx;
542 	tf->tf_ecx = scp->sc_ecx;
543 	tf->tf_eax = scp->sc_eax;
544 	tf->tf_eip = scp->sc_eip;
545 	tf->tf_cs = scp->sc_cs;
546 	tf->tf_esp = scp->sc_esp_at_signal;
547 	tf->tf_ss = scp->sc_ss;
548 
549 	/* Restore signal stack. */
550 	/*
551 	 * Linux really does it this way; it doesn't have space in sigframe
552 	 * to save the onstack flag.
553 	 */
554 	ss_gap = (ssize_t)
555 	    ((caddr_t) scp->sc_esp_at_signal - (caddr_t) sas->ss_sp);
556 	if (ss_gap >= 0 && ss_gap < sas->ss_size)
557 		sas->ss_flags |= SS_ONSTACK;
558 	else
559 		sas->ss_flags &= ~SS_ONSTACK;
560 
561 	/* Restore signal mask. */
562 	linux_old_to_native_sigset(&mask, &scp->sc_mask);
563 	(void) sigprocmask1(p, SIG_SETMASK, &mask, 0);
564 	DPRINTF(("sigreturn exit esp=%x eip=%x\n", tf->tf_esp, tf->tf_eip));
565 	return EJUSTRETURN;
566 }
567 
568 #ifdef USER_LDT
569 
570 int
571 linux_read_ldt(l, uap, retval)
572 	struct lwp *l;
573 	struct linux_sys_modify_ldt_args /* {
574 		syscallarg(int) func;
575 		syscallarg(void *) ptr;
576 		syscallarg(size_t) bytecount;
577 	} */ *uap;
578 	register_t *retval;
579 {
580 	struct proc *p = l->l_proc;
581 	struct i386_get_ldt_args gl;
582 	int error;
583 	caddr_t sg;
584 	char *parms;
585 
586 	DPRINTF(("linux_read_ldt!"));
587 	sg = stackgap_init(p, 0);
588 
589 	gl.start = 0;
590 	gl.desc = SCARG(uap, ptr);
591 	gl.num = SCARG(uap, bytecount) / sizeof(union descriptor);
592 
593 	parms = stackgap_alloc(p, &sg, sizeof(gl));
594 
595 	if ((error = copyout(&gl, parms, sizeof(gl))) != 0)
596 		return (error);
597 
598 	if ((error = i386_get_ldt(l, parms, retval)) != 0)
599 		return (error);
600 
601 	*retval *= sizeof(union descriptor);
602 	return (0);
603 }
604 
605 struct linux_ldt_info {
606 	u_int entry_number;
607 	u_long base_addr;
608 	u_int limit;
609 	u_int seg_32bit:1;
610 	u_int contents:2;
611 	u_int read_exec_only:1;
612 	u_int limit_in_pages:1;
613 	u_int seg_not_present:1;
614 	u_int useable:1;
615 };
616 
617 int
618 linux_write_ldt(l, uap, retval)
619 	struct lwp *l;
620 	struct linux_sys_modify_ldt_args /* {
621 		syscallarg(int) func;
622 		syscallarg(void *) ptr;
623 		syscallarg(size_t) bytecount;
624 	} */ *uap;
625 	register_t *retval;
626 {
627 	struct proc *p = l->l_proc;
628 	struct linux_ldt_info ldt_info;
629 	struct segment_descriptor sd;
630 	struct i386_set_ldt_args sl;
631 	int error;
632 	caddr_t sg;
633 	char *parms;
634 	int oldmode = (int)retval[0];
635 
636 	DPRINTF(("linux_write_ldt %d\n", oldmode));
637 	if (SCARG(uap, bytecount) != sizeof(ldt_info))
638 		return (EINVAL);
639 	if ((error = copyin(SCARG(uap, ptr), &ldt_info, sizeof(ldt_info))) != 0)
640 		return error;
641 	if (ldt_info.entry_number >= 8192)
642 		return (EINVAL);
643 	if (ldt_info.contents == 3) {
644 		if (oldmode)
645 			return (EINVAL);
646 		if (ldt_info.seg_not_present)
647 			return (EINVAL);
648 	}
649 
650 	if (ldt_info.base_addr == 0 && ldt_info.limit == 0 &&
651 	    (oldmode || (ldt_info.contents == 0 &&
652 	    ldt_info.read_exec_only == 1 && ldt_info.seg_32bit == 0 &&
653 	    ldt_info.limit_in_pages == 0 && ldt_info.seg_not_present == 1 &&
654 	    ldt_info.useable == 0))) {
655 		/* this means you should zero the ldt */
656 		(void)memset(&sd, 0, sizeof(sd));
657 	} else {
658 		sd.sd_lobase = ldt_info.base_addr & 0xffffff;
659 		sd.sd_hibase = (ldt_info.base_addr >> 24) & 0xff;
660 		sd.sd_lolimit = ldt_info.limit & 0xffff;
661 		sd.sd_hilimit = (ldt_info.limit >> 16) & 0xf;
662 		sd.sd_type = 16 | (ldt_info.contents << 2) |
663 		    (!ldt_info.read_exec_only << 1);
664 		sd.sd_dpl = SEL_UPL;
665 		sd.sd_p = !ldt_info.seg_not_present;
666 		sd.sd_def32 = ldt_info.seg_32bit;
667 		sd.sd_gran = ldt_info.limit_in_pages;
668 		if (!oldmode)
669 			sd.sd_xx = ldt_info.useable;
670 		else
671 			sd.sd_xx = 0;
672 	}
673 	sg = stackgap_init(p, 0);
674 	sl.start = ldt_info.entry_number;
675 	sl.desc = stackgap_alloc(p, &sg, sizeof(sd));
676 	sl.num = 1;
677 
678 	DPRINTF(("linux_write_ldt: idx=%d, base=0x%lx, limit=0x%x\n",
679 	    ldt_info.entry_number, ldt_info.base_addr, ldt_info.limit));
680 
681 	parms = stackgap_alloc(p, &sg, sizeof(sl));
682 
683 	if ((error = copyout(&sd, sl.desc, sizeof(sd))) != 0)
684 		return (error);
685 	if ((error = copyout(&sl, parms, sizeof(sl))) != 0)
686 		return (error);
687 
688 	if ((error = i386_set_ldt(l, parms, retval)) != 0)
689 		return (error);
690 
691 	*retval = 0;
692 	return (0);
693 }
694 
695 #endif /* USER_LDT */
696 
697 int
698 linux_sys_modify_ldt(l, v, retval)
699 	struct lwp *l;
700 	void *v;
701 	register_t *retval;
702 {
703 	struct linux_sys_modify_ldt_args /* {
704 		syscallarg(int) func;
705 		syscallarg(void *) ptr;
706 		syscallarg(size_t) bytecount;
707 	} */ *uap = v;
708 
709 	switch (SCARG(uap, func)) {
710 #ifdef USER_LDT
711 	case 0:
712 		return linux_read_ldt(l, uap, retval);
713 	case 1:
714 		retval[0] = 1;
715 		return linux_write_ldt(l, uap, retval);
716 	case 2:
717 #ifdef notyet
718 		return (linux_read_default_ldt(l, uap, retval);
719 #else
720 		return (ENOSYS);
721 #endif
722 	case 0x11:
723 		retval[0] = 0;
724 		return linux_write_ldt(l, uap, retval);
725 #endif /* USER_LDT */
726 
727 	default:
728 		return (ENOSYS);
729 	}
730 }
731 
732 /*
733  * XXX Pathetic hack to make svgalib work. This will fake the major
734  * device number of an opened VT so that svgalib likes it. grmbl.
735  * Should probably do it 'wrong the right way' and use a mapping
736  * array for all major device numbers, and map linux_mknod too.
737  */
738 dev_t
739 linux_fakedev(dev, raw)
740 	dev_t dev;
741 	int raw;
742 {
743 	if (raw) {
744 #if (NWSDISPLAY > 0)
745 		extern const struct cdevsw wsdisplay_cdevsw;
746 		if (cdevsw_lookup(dev) == &wsdisplay_cdevsw)
747 			return makedev(LINUX_CONS_MAJOR, (minor(dev) + 1));
748 #endif
749 	}
750 
751 	return dev;
752 }
753 
754 #if (NWSDISPLAY > 0)
755 /*
756  * That's not complete, but enough to get an X server running.
757  */
758 #define NR_KEYS 128
759 static const u_short plain_map[NR_KEYS] = {
760 	0x0200,	0x001b,	0x0031,	0x0032,	0x0033,	0x0034,	0x0035,	0x0036,
761 	0x0037,	0x0038,	0x0039,	0x0030,	0x002d,	0x003d,	0x007f,	0x0009,
762 	0x0b71,	0x0b77,	0x0b65,	0x0b72,	0x0b74,	0x0b79,	0x0b75,	0x0b69,
763 	0x0b6f,	0x0b70,	0x005b,	0x005d,	0x0201,	0x0702,	0x0b61,	0x0b73,
764 	0x0b64,	0x0b66,	0x0b67,	0x0b68,	0x0b6a,	0x0b6b,	0x0b6c,	0x003b,
765 	0x0027,	0x0060,	0x0700,	0x005c,	0x0b7a,	0x0b78,	0x0b63,	0x0b76,
766 	0x0b62,	0x0b6e,	0x0b6d,	0x002c,	0x002e,	0x002f,	0x0700,	0x030c,
767 	0x0703,	0x0020,	0x0207,	0x0100,	0x0101,	0x0102,	0x0103,	0x0104,
768 	0x0105,	0x0106,	0x0107,	0x0108,	0x0109,	0x0208,	0x0209,	0x0307,
769 	0x0308,	0x0309,	0x030b,	0x0304,	0x0305,	0x0306,	0x030a,	0x0301,
770 	0x0302,	0x0303,	0x0300,	0x0310,	0x0206,	0x0200,	0x003c,	0x010a,
771 	0x010b,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
772 	0x030e,	0x0702,	0x030d,	0x001c,	0x0701,	0x0205,	0x0114,	0x0603,
773 	0x0118,	0x0601,	0x0602,	0x0117,	0x0600,	0x0119,	0x0115,	0x0116,
774 	0x011a,	0x010c,	0x010d,	0x011b,	0x011c,	0x0110,	0x0311,	0x011d,
775 	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
776 }, shift_map[NR_KEYS] = {
777 	0x0200,	0x001b,	0x0021,	0x0040,	0x0023,	0x0024,	0x0025,	0x005e,
778 	0x0026,	0x002a,	0x0028,	0x0029,	0x005f,	0x002b,	0x007f,	0x0009,
779 	0x0b51,	0x0b57,	0x0b45,	0x0b52,	0x0b54,	0x0b59,	0x0b55,	0x0b49,
780 	0x0b4f,	0x0b50,	0x007b,	0x007d,	0x0201,	0x0702,	0x0b41,	0x0b53,
781 	0x0b44,	0x0b46,	0x0b47,	0x0b48,	0x0b4a,	0x0b4b,	0x0b4c,	0x003a,
782 	0x0022,	0x007e,	0x0700,	0x007c,	0x0b5a,	0x0b58,	0x0b43,	0x0b56,
783 	0x0b42,	0x0b4e,	0x0b4d,	0x003c,	0x003e,	0x003f,	0x0700,	0x030c,
784 	0x0703,	0x0020,	0x0207,	0x010a,	0x010b,	0x010c,	0x010d,	0x010e,
785 	0x010f,	0x0110,	0x0111,	0x0112,	0x0113,	0x0213,	0x0203,	0x0307,
786 	0x0308,	0x0309,	0x030b,	0x0304,	0x0305,	0x0306,	0x030a,	0x0301,
787 	0x0302,	0x0303,	0x0300,	0x0310,	0x0206,	0x0200,	0x003e,	0x010a,
788 	0x010b,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
789 	0x030e,	0x0702,	0x030d,	0x0200,	0x0701,	0x0205,	0x0114,	0x0603,
790 	0x020b,	0x0601,	0x0602,	0x0117,	0x0600,	0x020a,	0x0115,	0x0116,
791 	0x011a,	0x010c,	0x010d,	0x011b,	0x011c,	0x0110,	0x0311,	0x011d,
792 	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
793 }, altgr_map[NR_KEYS] = {
794 	0x0200,	0x0200,	0x0200,	0x0040,	0x0200,	0x0024,	0x0200,	0x0200,
795 	0x007b,	0x005b,	0x005d,	0x007d,	0x005c,	0x0200,	0x0200,	0x0200,
796 	0x0b71,	0x0b77,	0x0918,	0x0b72,	0x0b74,	0x0b79,	0x0b75,	0x0b69,
797 	0x0b6f,	0x0b70,	0x0200,	0x007e,	0x0201,	0x0702,	0x0914,	0x0b73,
798 	0x0917,	0x0919,	0x0b67,	0x0b68,	0x0b6a,	0x0b6b,	0x0b6c,	0x0200,
799 	0x0200,	0x0200,	0x0700,	0x0200,	0x0b7a,	0x0b78,	0x0916,	0x0b76,
800 	0x0915,	0x0b6e,	0x0b6d,	0x0200,	0x0200,	0x0200,	0x0700,	0x030c,
801 	0x0703,	0x0200,	0x0207,	0x050c,	0x050d,	0x050e,	0x050f,	0x0510,
802 	0x0511,	0x0512,	0x0513,	0x0514,	0x0515,	0x0208,	0x0202,	0x0911,
803 	0x0912,	0x0913,	0x030b,	0x090e,	0x090f,	0x0910,	0x030a,	0x090b,
804 	0x090c,	0x090d,	0x090a,	0x0310,	0x0206,	0x0200,	0x007c,	0x0516,
805 	0x0517,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
806 	0x030e,	0x0702,	0x030d,	0x0200,	0x0701,	0x0205,	0x0114,	0x0603,
807 	0x0118,	0x0601,	0x0602,	0x0117,	0x0600,	0x0119,	0x0115,	0x0116,
808 	0x011a,	0x010c,	0x010d,	0x011b,	0x011c,	0x0110,	0x0311,	0x011d,
809 	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
810 }, ctrl_map[NR_KEYS] = {
811 	0x0200,	0x0200,	0x0200,	0x0000,	0x001b,	0x001c,	0x001d,	0x001e,
812 	0x001f,	0x007f,	0x0200,	0x0200,	0x001f,	0x0200,	0x0008,	0x0200,
813 	0x0011,	0x0017,	0x0005,	0x0012,	0x0014,	0x0019,	0x0015,	0x0009,
814 	0x000f,	0x0010,	0x001b,	0x001d,	0x0201,	0x0702,	0x0001,	0x0013,
815 	0x0004,	0x0006,	0x0007,	0x0008,	0x000a,	0x000b,	0x000c,	0x0200,
816 	0x0007,	0x0000,	0x0700,	0x001c,	0x001a,	0x0018,	0x0003,	0x0016,
817 	0x0002,	0x000e,	0x000d,	0x0200,	0x020e,	0x007f,	0x0700,	0x030c,
818 	0x0703,	0x0000,	0x0207,	0x0100,	0x0101,	0x0102,	0x0103,	0x0104,
819 	0x0105,	0x0106,	0x0107,	0x0108,	0x0109,	0x0208,	0x0204,	0x0307,
820 	0x0308,	0x0309,	0x030b,	0x0304,	0x0305,	0x0306,	0x030a,	0x0301,
821 	0x0302,	0x0303,	0x0300,	0x0310,	0x0206,	0x0200,	0x0200,	0x010a,
822 	0x010b,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
823 	0x030e,	0x0702,	0x030d,	0x001c,	0x0701,	0x0205,	0x0114,	0x0603,
824 	0x0118,	0x0601,	0x0602,	0x0117,	0x0600,	0x0119,	0x0115,	0x0116,
825 	0x011a,	0x010c,	0x010d,	0x011b,	0x011c,	0x0110,	0x0311,	0x011d,
826 	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
827 };
828 
829 const u_short * const linux_keytabs[] = {
830 	plain_map, shift_map, altgr_map, altgr_map, ctrl_map
831 };
832 #endif
833 
834 static struct biosdisk_info *
835 fd2biosinfo(p, fp)
836 	struct proc *p;
837 	struct file *fp;
838 {
839 	struct vnode *vp;
840 	const char *blkname;
841 	char diskname[16];
842 	int i;
843 	struct nativedisk_info *nip;
844 	struct disklist *dl = i386_alldisks;
845 
846 	if (fp->f_type != DTYPE_VNODE)
847 		return NULL;
848 	vp = (struct vnode *)fp->f_data;
849 
850 	if (vp->v_type != VBLK)
851 		return NULL;
852 
853 	blkname = devsw_blk2name(major(vp->v_rdev));
854 	snprintf(diskname, sizeof diskname, "%s%u", blkname,
855 	    DISKUNIT(vp->v_rdev));
856 
857 	for (i = 0; i < dl->dl_nnativedisks; i++) {
858 		nip = &dl->dl_nativedisks[i];
859 		if (strcmp(diskname, nip->ni_devname))
860 			continue;
861 		if (nip->ni_nmatches != 0)
862 			return &dl->dl_biosdisks[nip->ni_biosmatches[0]];
863 	}
864 
865 	return NULL;
866 }
867 
868 
869 /*
870  * We come here in a last attempt to satisfy a Linux ioctl() call
871  */
872 int
873 linux_machdepioctl(p, v, retval)
874 	struct proc *p;
875 	void *v;
876 	register_t *retval;
877 {
878 	struct linux_sys_ioctl_args /* {
879 		syscallarg(int) fd;
880 		syscallarg(u_long) com;
881 		syscallarg(caddr_t) data;
882 	} */ *uap = v;
883 	struct sys_ioctl_args bia;
884 	u_long com;
885 	int error, error1;
886 #if (NWSDISPLAY > 0)
887 	struct vt_mode lvt;
888 	caddr_t bvtp, sg;
889 	struct kbentry kbe;
890 #endif
891 	struct linux_hd_geometry hdg;
892 	struct linux_hd_big_geometry hdg_big;
893 	struct biosdisk_info *bip;
894 	struct filedesc *fdp;
895 	struct file *fp;
896 	int fd;
897 	struct disklabel label, *labp;
898 	struct partinfo partp;
899 	int (*ioctlf)(struct file *, u_long, void *, struct proc *);
900 	u_long start, biostotal, realtotal;
901 	u_char heads, sectors;
902 	u_int cylinders;
903 	struct ioctl_pt pt;
904 
905 	fd = SCARG(uap, fd);
906 	SCARG(&bia, fd) = fd;
907 	SCARG(&bia, data) = SCARG(uap, data);
908 	com = SCARG(uap, com);
909 
910 	fdp = p->p_fd;
911 
912 	if ((fp = fd_getfile(fdp, fd)) == NULL)
913 		return (EBADF);
914 
915 	FILE_USE(fp);
916 
917 	switch (com) {
918 #if (NWSDISPLAY > 0)
919 	case LINUX_KDGKBMODE:
920 		com = KDGKBMODE;
921 		break;
922 	case LINUX_KDSKBMODE:
923 		com = KDSKBMODE;
924 		if ((unsigned)SCARG(uap, data) == LINUX_K_MEDIUMRAW)
925 			SCARG(&bia, data) = (caddr_t)K_RAW;
926 		break;
927 	case LINUX_KIOCSOUND:
928 		SCARG(&bia, data) =
929 		    (caddr_t)(((unsigned long)SCARG(&bia, data)) & 0xffff);
930 		/* fall through */
931 	case LINUX_KDMKTONE:
932 		com = KDMKTONE;
933 		break;
934 	case LINUX_KDSETMODE:
935 		com = KDSETMODE;
936 		break;
937 	case LINUX_KDGETMODE:
938 		/* KD_* values are equal to the wscons numbers */
939 		com = WSDISPLAYIO_GMODE;
940 		break;
941 	case LINUX_KDENABIO:
942 		com = KDENABIO;
943 		break;
944 	case LINUX_KDDISABIO:
945 		com = KDDISABIO;
946 		break;
947 	case LINUX_KDGETLED:
948 		com = KDGETLED;
949 		break;
950 	case LINUX_KDSETLED:
951 		com = KDSETLED;
952 		break;
953 	case LINUX_VT_OPENQRY:
954 		com = VT_OPENQRY;
955 		break;
956 	case LINUX_VT_GETMODE:
957 		SCARG(&bia, com) = VT_GETMODE;
958 		/* XXX NJWLWP */
959 		if ((error = sys_ioctl(curlwp, &bia, retval)))
960 			goto out;
961 		if ((error = copyin(SCARG(uap, data), (caddr_t)&lvt,
962 		    sizeof (struct vt_mode))))
963 			goto out;
964 		lvt.relsig = native_to_linux_signo[lvt.relsig];
965 		lvt.acqsig = native_to_linux_signo[lvt.acqsig];
966 		lvt.frsig = native_to_linux_signo[lvt.frsig];
967 		error = copyout((caddr_t)&lvt, SCARG(uap, data),
968 		    sizeof (struct vt_mode));
969 		goto out;
970 	case LINUX_VT_SETMODE:
971 		com = VT_SETMODE;
972 		if ((error = copyin(SCARG(uap, data), (caddr_t)&lvt,
973 		    sizeof (struct vt_mode))))
974 			goto out;
975 		lvt.relsig = linux_to_native_signo[lvt.relsig];
976 		lvt.acqsig = linux_to_native_signo[lvt.acqsig];
977 		lvt.frsig = linux_to_native_signo[lvt.frsig];
978 		sg = stackgap_init(p, 0);
979 		bvtp = stackgap_alloc(p, &sg, sizeof (struct vt_mode));
980 		if ((error = copyout(&lvt, bvtp, sizeof (struct vt_mode))))
981 			goto out;
982 		SCARG(&bia, data) = bvtp;
983 		break;
984 	case LINUX_VT_DISALLOCATE:
985 		/* XXX should use WSDISPLAYIO_DELSCREEN */
986 		error = 0;
987 		goto out;
988 	case LINUX_VT_RELDISP:
989 		com = VT_RELDISP;
990 		break;
991 	case LINUX_VT_ACTIVATE:
992 		com = VT_ACTIVATE;
993 		break;
994 	case LINUX_VT_WAITACTIVE:
995 		com = VT_WAITACTIVE;
996 		break;
997 	case LINUX_VT_GETSTATE:
998 		com = VT_GETSTATE;
999 		break;
1000 	case LINUX_KDGKBTYPE:
1001 	    {
1002 		static const u_int8_t kb101 = KB_101;
1003 
1004 		/* This is what Linux does. */
1005 		error = copyout(&kb101, SCARG(uap, data), 1);
1006 		goto out;
1007 	    }
1008 	case LINUX_KDGKBENT:
1009 		/*
1010 		 * The Linux KDGKBENT ioctl is different from the
1011 		 * SYSV original. So we handle it in machdep code.
1012 		 * XXX We should use keyboard mapping information
1013 		 * from wsdisplay, but this would be expensive.
1014 		 */
1015 		if ((error = copyin(SCARG(uap, data), &kbe,
1016 				    sizeof(struct kbentry))))
1017 			goto out;
1018 		if (kbe.kb_table >= sizeof(linux_keytabs) / sizeof(u_short *)
1019 		    || kbe.kb_index >= NR_KEYS) {
1020 			error = EINVAL;
1021 			goto out;
1022 		}
1023 		kbe.kb_value = linux_keytabs[kbe.kb_table][kbe.kb_index];
1024 		error = copyout(&kbe, SCARG(uap, data),
1025 				sizeof(struct kbentry));
1026 		goto out;
1027 #endif
1028 	case LINUX_HDIO_GETGEO:
1029 	case LINUX_HDIO_GETGEO_BIG:
1030 		/*
1031 		 * Try to mimic Linux behaviour: return the BIOS geometry
1032 		 * if possible (extending its # of cylinders if it's beyond
1033 		 * the 1023 limit), fall back to the MI geometry (i.e.
1034 		 * the real geometry) if not found, by returning an
1035 		 * error. See common/linux_hdio.c
1036 		 */
1037 		bip = fd2biosinfo(p, fp);
1038 		ioctlf = fp->f_ops->fo_ioctl;
1039 		error = ioctlf(fp, DIOCGDEFLABEL, (caddr_t)&label, p);
1040 		error1 = ioctlf(fp, DIOCGPART, (caddr_t)&partp, p);
1041 		if (error != 0 && error1 != 0) {
1042 			error = error1;
1043 			goto out;
1044 		}
1045 		labp = error != 0 ? &label : partp.disklab;
1046 		start = error1 != 0 ? partp.part->p_offset : 0;
1047 		if (bip != NULL && bip->bi_head != 0 && bip->bi_sec != 0
1048 		    && bip->bi_cyl != 0) {
1049 			heads = bip->bi_head;
1050 			sectors = bip->bi_sec;
1051 			cylinders = bip->bi_cyl;
1052 			biostotal = heads * sectors * cylinders;
1053 			realtotal = labp->d_ntracks * labp->d_nsectors *
1054 			    labp->d_ncylinders;
1055 			if (realtotal > biostotal)
1056 				cylinders = realtotal / (heads * sectors);
1057 		} else {
1058 			heads = labp->d_ntracks;
1059 			cylinders = labp->d_ncylinders;
1060 			sectors = labp->d_nsectors;
1061 		}
1062 		if (com == LINUX_HDIO_GETGEO) {
1063 			hdg.start = start;
1064 			hdg.heads = heads;
1065 			hdg.cylinders = cylinders;
1066 			hdg.sectors = sectors;
1067 			error = copyout(&hdg, SCARG(uap, data), sizeof hdg);
1068 			goto out;
1069 		} else {
1070 			hdg_big.start = start;
1071 			hdg_big.heads = heads;
1072 			hdg_big.cylinders = cylinders;
1073 			hdg_big.sectors = sectors;
1074 			error = copyout(&hdg_big, SCARG(uap, data),
1075 			    sizeof hdg_big);
1076 			goto out;
1077 		}
1078 
1079 	default:
1080 		/*
1081 		 * Unknown to us. If it's on a device, just pass it through
1082 		 * using PTIOCLINUX, the device itself might be able to
1083 		 * make some sense of it.
1084 		 * XXX hack: if the function returns EJUSTRETURN,
1085 		 * it has stuffed a sysctl return value in pt.data.
1086 		 */
1087 		FILE_USE(fp);
1088 		ioctlf = fp->f_ops->fo_ioctl;
1089 		pt.com = SCARG(uap, com);
1090 		pt.data = SCARG(uap, data);
1091 		error = ioctlf(fp, PTIOCLINUX, (caddr_t)&pt, p);
1092 		FILE_UNUSE(fp, p);
1093 		if (error == EJUSTRETURN) {
1094 			retval[0] = (register_t)pt.data;
1095 			error = 0;
1096 		}
1097 
1098 		if (error == ENOTTY)
1099 			DPRINTF(("linux_machdepioctl: invalid ioctl %08lx\n",
1100 			    com));
1101 		goto out;
1102 	}
1103 	SCARG(&bia, com) = com;
1104 	/* XXX NJWLWP */
1105 	error = sys_ioctl(curlwp, &bia, retval);
1106 out:
1107 	FILE_UNUSE(fp ,p);
1108 	return error;
1109 }
1110 
1111 /*
1112  * Set I/O permissions for a process. Just set the maximum level
1113  * right away (ignoring the argument), otherwise we would have
1114  * to rely on I/O permission maps, which are not implemented.
1115  */
1116 int
1117 linux_sys_iopl(l, v, retval)
1118 	struct lwp *l;
1119 	void *v;
1120 	register_t *retval;
1121 {
1122 #if 0
1123 	struct linux_sys_iopl_args /* {
1124 		syscallarg(int) level;
1125 	} */ *uap = v;
1126 #endif
1127 	struct proc *p = l->l_proc;
1128 	struct trapframe *fp = l->l_md.md_regs;
1129 
1130 	if (suser(p->p_ucred, &p->p_acflag) != 0)
1131 		return EPERM;
1132 	fp->tf_eflags |= PSL_IOPL;
1133 	*retval = 0;
1134 	return 0;
1135 }
1136 
1137 /*
1138  * See above. If a root process tries to set access to an I/O port,
1139  * just let it have the whole range.
1140  */
1141 int
1142 linux_sys_ioperm(l, v, retval)
1143 	struct lwp *l;
1144 	void *v;
1145 	register_t *retval;
1146 {
1147 	struct linux_sys_ioperm_args /* {
1148 		syscallarg(unsigned int) lo;
1149 		syscallarg(unsigned int) hi;
1150 		syscallarg(int) val;
1151 	} */ *uap = v;
1152 	struct proc *p = l->l_proc;
1153 	struct trapframe *fp = l->l_md.md_regs;
1154 
1155 	if (suser(p->p_ucred, &p->p_acflag) != 0)
1156 		return EPERM;
1157 	if (SCARG(uap, val))
1158 		fp->tf_eflags |= PSL_IOPL;
1159 	*retval = 0;
1160 	return 0;
1161 }
1162 
1163 int
1164 linux_exec_setup_stack(struct proc *p, struct exec_package *epp)
1165 {
1166 	u_long max_stack_size;
1167 	u_long access_linear_min, access_size;
1168 	u_long noaccess_linear_min, noaccess_size;
1169 
1170 #ifndef	USRSTACK32
1171 #define USRSTACK32	(0x00000000ffffffffL&~PGOFSET)
1172 #endif
1173 
1174 	if (epp->ep_flags & EXEC_32) {
1175 		epp->ep_minsaddr = USRSTACK32;
1176 		max_stack_size = MAXSSIZ;
1177 	} else {
1178 		epp->ep_minsaddr = USRSTACK;
1179 		max_stack_size = MAXSSIZ;
1180 	}
1181 
1182 	if (epp->ep_minsaddr > LINUX_USRSTACK)
1183 		epp->ep_minsaddr = LINUX_USRSTACK;
1184 #ifdef DEBUG_LINUX
1185 	else {
1186 		/*
1187 		 * Someone needs to make KERNBASE and TEXTADDR
1188 		 * java versions < 1.4.2 need the stack to be
1189 		 * at 0xC0000000
1190 		 */
1191 		uprintf("Cannot setup stack to 0xC0000000, "
1192 		    "java will not work properly\n");
1193 	}
1194 #endif
1195 	epp->ep_maxsaddr = (u_long)STACK_GROW(epp->ep_minsaddr,
1196 		max_stack_size);
1197 	epp->ep_ssize = p->p_rlimit[RLIMIT_STACK].rlim_cur;
1198 
1199 	/*
1200 	 * set up commands for stack.  note that this takes *two*, one to
1201 	 * map the part of the stack which we can access, and one to map
1202 	 * the part which we can't.
1203 	 *
1204 	 * arguably, it could be made into one, but that would require the
1205 	 * addition of another mapping proc, which is unnecessary
1206 	 */
1207 	access_size = epp->ep_ssize;
1208 	access_linear_min = (u_long)STACK_ALLOC(epp->ep_minsaddr, access_size);
1209 	noaccess_size = max_stack_size - access_size;
1210 	noaccess_linear_min = (u_long)STACK_ALLOC(STACK_GROW(epp->ep_minsaddr,
1211 	    access_size), noaccess_size);
1212 	if (noaccess_size > 0) {
1213 		NEW_VMCMD(&epp->ep_vmcmds, vmcmd_map_zero, noaccess_size,
1214 		    noaccess_linear_min, NULLVP, 0, VM_PROT_NONE);
1215 	}
1216 	KASSERT(access_size > 0);
1217 	NEW_VMCMD(&epp->ep_vmcmds, vmcmd_map_zero, access_size,
1218 	    access_linear_min, NULLVP, 0, VM_PROT_READ | VM_PROT_WRITE);
1219 
1220 	return 0;
1221 }
1222