xref: /netbsd-src/sys/compat/linux/arch/i386/linux_machdep.c (revision 28c37e673e4d9b6cbdc7483062b915cc61d1ccf5)
1 /*	$NetBSD: linux_machdep.c,v 1.80 2002/09/25 22:21:34 thorpej Exp $	*/
2 
3 /*-
4  * Copyright (c) 1995, 2000 The NetBSD Foundation, Inc.
5  * All rights reserved.
6  *
7  * This code is derived from software contributed to The NetBSD Foundation
8  * by Frank van der Linden.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  * 3. All advertising materials mentioning features or use of this software
19  *    must display the following acknowledgement:
20  *	This product includes software developed by the NetBSD
21  *	Foundation, Inc. and its contributors.
22  * 4. Neither the name of The NetBSD Foundation nor the names of its
23  *    contributors may be used to endorse or promote products derived
24  *    from this software without specific prior written permission.
25  *
26  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
27  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
28  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
29  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
30  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36  * POSSIBILITY OF SUCH DAMAGE.
37  */
38 
39 #include <sys/cdefs.h>
40 __KERNEL_RCSID(0, "$NetBSD: linux_machdep.c,v 1.80 2002/09/25 22:21:34 thorpej Exp $");
41 
42 #if defined(_KERNEL_OPT)
43 #include "opt_vm86.h"
44 #include "opt_user_ldt.h"
45 #endif
46 
47 #include <sys/param.h>
48 #include <sys/systm.h>
49 #include <sys/signalvar.h>
50 #include <sys/kernel.h>
51 #include <sys/proc.h>
52 #include <sys/user.h>
53 #include <sys/buf.h>
54 #include <sys/reboot.h>
55 #include <sys/conf.h>
56 #include <sys/exec.h>
57 #include <sys/file.h>
58 #include <sys/callout.h>
59 #include <sys/malloc.h>
60 #include <sys/mbuf.h>
61 #include <sys/msgbuf.h>
62 #include <sys/mount.h>
63 #include <sys/vnode.h>
64 #include <sys/device.h>
65 #include <sys/syscallargs.h>
66 #include <sys/filedesc.h>
67 #include <sys/exec_elf.h>
68 #include <sys/disklabel.h>
69 #include <sys/ioctl.h>
70 #include <miscfs/specfs/specdev.h>
71 
72 #include <compat/linux/common/linux_types.h>
73 #include <compat/linux/common/linux_signal.h>
74 #include <compat/linux/common/linux_util.h>
75 #include <compat/linux/common/linux_ioctl.h>
76 #include <compat/linux/common/linux_hdio.h>
77 #include <compat/linux/common/linux_exec.h>
78 #include <compat/linux/common/linux_machdep.h>
79 
80 #include <compat/linux/linux_syscallargs.h>
81 
82 #include <machine/cpu.h>
83 #include <machine/cpufunc.h>
84 #include <machine/psl.h>
85 #include <machine/reg.h>
86 #include <machine/segments.h>
87 #include <machine/specialreg.h>
88 #include <machine/sysarch.h>
89 #include <machine/vm86.h>
90 #include <machine/vmparam.h>
91 
92 /*
93  * To see whether wscons is configured (for virtual console ioctl calls).
94  */
95 #if defined(_KERNEL_OPT)
96 #include "wsdisplay.h"
97 #endif
98 #if (NWSDISPLAY > 0)
99 #include <dev/wscons/wsconsio.h>
100 #include <dev/wscons/wsdisplay_usl_io.h>
101 #if defined(_KERNEL_OPT)
102 #include "opt_xserver.h"
103 #endif
104 #endif
105 
106 #ifdef USER_LDT
107 #include <machine/cpu.h>
108 int linux_read_ldt __P((struct proc *, struct linux_sys_modify_ldt_args *,
109     register_t *));
110 int linux_write_ldt __P((struct proc *, struct linux_sys_modify_ldt_args *,
111     register_t *));
112 #endif
113 
114 #ifdef DEBUG_LINUX
115 #define DPRINTF(a) uprintf a
116 #else
117 #define DPRINTF(a)
118 #endif
119 
120 static struct biosdisk_info *fd2biosinfo __P((struct proc *, struct file *));
121 extern struct disklist *i386_alldisks;
122 
123 /*
124  * Deal with some i386-specific things in the Linux emulation code.
125  */
126 
127 void
128 linux_setregs(p, epp, stack)
129 	struct proc *p;
130 	struct exec_package *epp;
131 	u_long stack;
132 {
133 	struct pcb *pcb = &p->p_addr->u_pcb;
134 	struct trapframe *tf;
135 
136 #if NNPX > 0
137 	/* If we were using the FPU, forget about it. */
138 	if (npxproc == p)
139 		npxdrop();
140 #endif
141 
142 #ifdef USER_LDT
143 	pmap_ldt_cleanup(p);
144 #endif
145 
146 	p->p_md.md_flags &= ~MDP_USEDFPU;
147 	pcb->pcb_flags = 0;
148 
149 	if (i386_use_fxsave) {
150 		pcb->pcb_savefpu.sv_xmm.sv_env.en_cw = __Linux_NPXCW__;
151 		pcb->pcb_savefpu.sv_xmm.sv_env.en_mxcsr = __INITIAL_MXCSR__;
152 	} else
153 		pcb->pcb_savefpu.sv_87.sv_env.en_cw = __Linux_NPXCW__;
154 
155 	tf = p->p_md.md_regs;
156 	tf->tf_gs = GSEL(GUDATA_SEL, SEL_UPL);
157 	tf->tf_fs = GSEL(GUDATA_SEL, SEL_UPL);
158 	tf->tf_es = GSEL(GUDATA_SEL, SEL_UPL);
159 	tf->tf_ds = GSEL(GUDATA_SEL, SEL_UPL);
160 	tf->tf_edi = 0;
161 	tf->tf_esi = 0;
162 	tf->tf_ebp = 0;
163 	tf->tf_ebx = (int)p->p_psstr;
164 	tf->tf_edx = 0;
165 	tf->tf_ecx = 0;
166 	tf->tf_eax = 0;
167 	tf->tf_eip = epp->ep_entry;
168 	tf->tf_cs = GSEL(GUCODE_SEL, SEL_UPL);
169 	tf->tf_eflags = PSL_USERSET;
170 	tf->tf_esp = stack;
171 	tf->tf_ss = GSEL(GUDATA_SEL, SEL_UPL);
172 }
173 
174 /*
175  * Send an interrupt to process.
176  *
177  * Stack is set up to allow sigcode stored
178  * in u. to call routine, followed by kcall
179  * to sigreturn routine below.  After sigreturn
180  * resets the signal mask, the stack, and the
181  * frame pointer, it returns to the user
182  * specified pc, psl.
183  */
184 
185 void
186 linux_sendsig(sig, mask, code)
187 	int sig;
188 	sigset_t *mask;
189 	u_long code;
190 {
191 	struct proc *p = curproc;
192 	struct trapframe *tf;
193 	struct linux_sigframe *fp, frame;
194 	int onstack;
195 	sig_t catcher = SIGACTION(p, sig).sa_handler;
196 
197 	tf = p->p_md.md_regs;
198 
199 	/* Do we need to jump onto the signal stack? */
200 	onstack =
201 	    (p->p_sigctx.ps_sigstk.ss_flags & (SS_DISABLE | SS_ONSTACK)) == 0 &&
202 	    (SIGACTION(p, sig).sa_flags & SA_ONSTACK) != 0;
203 
204 	/* Allocate space for the signal handler context. */
205 	if (onstack)
206 		fp = (struct linux_sigframe *)((caddr_t)p->p_sigctx.ps_sigstk.ss_sp +
207 					  p->p_sigctx.ps_sigstk.ss_size);
208 	else
209 		fp = (struct linux_sigframe *)tf->tf_esp;
210 	fp--;
211 
212 	/* Build stack frame for signal trampoline. */
213 	frame.sf_handler = catcher;
214 	frame.sf_sig = native_to_linux_signo[sig];
215 
216 	/* Save register context. */
217 #ifdef VM86
218 	if (tf->tf_eflags & PSL_VM) {
219 		frame.sf_sc.sc_gs = tf->tf_vm86_gs;
220 		frame.sf_sc.sc_fs = tf->tf_vm86_fs;
221 		frame.sf_sc.sc_es = tf->tf_vm86_es;
222 		frame.sf_sc.sc_ds = tf->tf_vm86_ds;
223 		frame.sf_sc.sc_eflags = get_vflags(p);
224 	} else
225 #endif
226 	{
227 		frame.sf_sc.sc_gs = tf->tf_gs;
228 		frame.sf_sc.sc_fs = tf->tf_fs;
229 		frame.sf_sc.sc_es = tf->tf_es;
230 		frame.sf_sc.sc_ds = tf->tf_ds;
231 		frame.sf_sc.sc_eflags = tf->tf_eflags;
232 	}
233 	frame.sf_sc.sc_edi = tf->tf_edi;
234 	frame.sf_sc.sc_esi = tf->tf_esi;
235 	frame.sf_sc.sc_ebp = tf->tf_ebp;
236 	frame.sf_sc.sc_ebx = tf->tf_ebx;
237 	frame.sf_sc.sc_edx = tf->tf_edx;
238 	frame.sf_sc.sc_ecx = tf->tf_ecx;
239 	frame.sf_sc.sc_eax = tf->tf_eax;
240 	frame.sf_sc.sc_eip = tf->tf_eip;
241 	frame.sf_sc.sc_cs = tf->tf_cs;
242 	frame.sf_sc.sc_esp_at_signal = tf->tf_esp;
243 	frame.sf_sc.sc_ss = tf->tf_ss;
244 	frame.sf_sc.sc_err = tf->tf_err;
245 	frame.sf_sc.sc_trapno = tf->tf_trapno;
246 	frame.sf_sc.sc_cr2 = p->p_addr->u_pcb.pcb_cr2;
247 
248 	/* Save signal stack. */
249 	/* Linux doesn't save the onstack flag in sigframe */
250 
251 	/* Save signal mask. */
252 	native_to_linux_old_sigset(&frame.sf_sc.sc_mask, mask);
253 
254 	if (copyout(&frame, fp, sizeof(frame)) != 0) {
255 		/*
256 		 * Process has trashed its stack; give it an illegal
257 		 * instruction to halt it in its tracks.
258 		 */
259 		sigexit(p, SIGILL);
260 		/* NOTREACHED */
261 	}
262 
263 	/*
264 	 * Build context to run handler in.
265 	 */
266 	tf->tf_gs = GSEL(GUDATA_SEL, SEL_UPL);
267 	tf->tf_fs = GSEL(GUDATA_SEL, SEL_UPL);
268 	tf->tf_es = GSEL(GUDATA_SEL, SEL_UPL);
269 	tf->tf_ds = GSEL(GUDATA_SEL, SEL_UPL);
270 	tf->tf_eip = (int)p->p_sigctx.ps_sigcode;
271 	tf->tf_cs = GSEL(GUCODE_SEL, SEL_UPL);
272 	tf->tf_eflags &= ~(PSL_T|PSL_VM|PSL_AC);
273 	tf->tf_esp = (int)fp;
274 	tf->tf_ss = GSEL(GUDATA_SEL, SEL_UPL);
275 
276 	/* Remember that we're now on the signal stack. */
277 	if (onstack)
278 		p->p_sigctx.ps_sigstk.ss_flags |= SS_ONSTACK;
279 }
280 
281 /*
282  * System call to cleanup state after a signal
283  * has been taken.  Reset signal mask and
284  * stack state from context left by sendsig (above).
285  * Return to previous pc and psl as specified by
286  * context left by sendsig. Check carefully to
287  * make sure that the user has not modified the
288  * psl to gain improper privileges or to cause
289  * a machine fault.
290  */
291 int
292 linux_sys_rt_sigreturn(p, v, retval)
293 	struct proc *p;
294 	void *v;
295 	register_t *retval;
296 {
297 	/* XXX XAX write me */
298 	return(ENOSYS);
299 }
300 
301 int
302 linux_sys_sigreturn(p, v, retval)
303 	struct proc *p;
304 	void *v;
305 	register_t *retval;
306 {
307 	struct linux_sys_sigreturn_args /* {
308 		syscallarg(struct linux_sigcontext *) scp;
309 	} */ *uap = v;
310 	struct linux_sigcontext *scp, context;
311 	struct trapframe *tf;
312 	sigset_t mask;
313 	ssize_t ss_gap;
314 
315 	/*
316 	 * The trampoline code hands us the context.
317 	 * It is unsafe to keep track of it ourselves, in the event that a
318 	 * program jumps out of a signal handler.
319 	 */
320 	scp = SCARG(uap, scp);
321 	if (copyin((caddr_t)scp, &context, sizeof(*scp)) != 0)
322 		return (EFAULT);
323 
324 	/* Restore register context. */
325 	tf = p->p_md.md_regs;
326 #ifdef VM86
327 	if (context.sc_eflags & PSL_VM) {
328 		tf->tf_vm86_gs = context.sc_gs;
329 		tf->tf_vm86_fs = context.sc_fs;
330 		tf->tf_vm86_es = context.sc_es;
331 		tf->tf_vm86_ds = context.sc_ds;
332 		set_vflags(p, context.sc_eflags);
333 	} else
334 #endif
335 	{
336 		/*
337 		 * Check for security violations.  If we're returning to
338 		 * protected mode, the CPU will validate the segment registers
339 		 * automatically and generate a trap on violations.  We handle
340 		 * the trap, rather than doing all of the checking here.
341 		 */
342 		if (((context.sc_eflags ^ tf->tf_eflags) & PSL_USERSTATIC) != 0 ||
343 		    !USERMODE(context.sc_cs, context.sc_eflags))
344 			return (EINVAL);
345 
346 		tf->tf_gs = context.sc_gs;
347 		tf->tf_fs = context.sc_fs;
348 		tf->tf_es = context.sc_es;
349 		tf->tf_ds = context.sc_ds;
350 		tf->tf_eflags = context.sc_eflags;
351 	}
352 	tf->tf_edi = context.sc_edi;
353 	tf->tf_esi = context.sc_esi;
354 	tf->tf_ebp = context.sc_ebp;
355 	tf->tf_ebx = context.sc_ebx;
356 	tf->tf_edx = context.sc_edx;
357 	tf->tf_ecx = context.sc_ecx;
358 	tf->tf_eax = context.sc_eax;
359 	tf->tf_eip = context.sc_eip;
360 	tf->tf_cs = context.sc_cs;
361 	tf->tf_esp = context.sc_esp_at_signal;
362 	tf->tf_ss = context.sc_ss;
363 
364 	/* Restore signal stack. */
365 	/*
366 	 * Linux really does it this way; it doesn't have space in sigframe
367 	 * to save the onstack flag.
368 	 */
369 	ss_gap = (ssize_t)
370 	    ((caddr_t) context.sc_esp_at_signal - (caddr_t) p->p_sigctx.ps_sigstk.ss_sp);
371 	if (ss_gap >= 0  && ss_gap < p->p_sigctx.ps_sigstk.ss_size)
372 		p->p_sigctx.ps_sigstk.ss_flags |= SS_ONSTACK;
373 	else
374 		p->p_sigctx.ps_sigstk.ss_flags &= ~SS_ONSTACK;
375 
376 	/* Restore signal mask. */
377 	linux_old_to_native_sigset(&mask, &context.sc_mask);
378 	(void) sigprocmask1(p, SIG_SETMASK, &mask, 0);
379 
380 	return (EJUSTRETURN);
381 }
382 
383 #ifdef USER_LDT
384 
385 int
386 linux_read_ldt(p, uap, retval)
387 	struct proc *p;
388 	struct linux_sys_modify_ldt_args /* {
389 		syscallarg(int) func;
390 		syscallarg(void *) ptr;
391 		syscallarg(size_t) bytecount;
392 	} */ *uap;
393 	register_t *retval;
394 {
395 	struct i386_get_ldt_args gl;
396 	int error;
397 	caddr_t sg;
398 	char *parms;
399 
400 	DPRINTF(("linux_read_ldt!"));
401 	sg = stackgap_init(p, 0);
402 
403 	gl.start = 0;
404 	gl.desc = SCARG(uap, ptr);
405 	gl.num = SCARG(uap, bytecount) / sizeof(union descriptor);
406 
407 	parms = stackgap_alloc(p, &sg, sizeof(gl));
408 
409 	if ((error = copyout(&gl, parms, sizeof(gl))) != 0)
410 		return (error);
411 
412 	if ((error = i386_get_ldt(p, parms, retval)) != 0)
413 		return (error);
414 
415 	*retval *= sizeof(union descriptor);
416 	return (0);
417 }
418 
419 struct linux_ldt_info {
420 	u_int entry_number;
421 	u_long base_addr;
422 	u_int limit;
423 	u_int seg_32bit:1;
424 	u_int contents:2;
425 	u_int read_exec_only:1;
426 	u_int limit_in_pages:1;
427 	u_int seg_not_present:1;
428 	u_int useable:1;
429 };
430 
431 int
432 linux_write_ldt(p, uap, retval)
433 	struct proc *p;
434 	struct linux_sys_modify_ldt_args /* {
435 		syscallarg(int) func;
436 		syscallarg(void *) ptr;
437 		syscallarg(size_t) bytecount;
438 	} */ *uap;
439 	register_t *retval;
440 {
441 	struct linux_ldt_info ldt_info;
442 	struct segment_descriptor sd;
443 	struct i386_set_ldt_args sl;
444 	int error;
445 	caddr_t sg;
446 	char *parms;
447 	int oldmode = (int)retval[0];
448 
449 	DPRINTF(("linux_write_ldt %d\n", oldmode));
450 	if (SCARG(uap, bytecount) != sizeof(ldt_info))
451 		return (EINVAL);
452 	if ((error = copyin(SCARG(uap, ptr), &ldt_info, sizeof(ldt_info))) != 0)
453 		return error;
454 	if (ldt_info.entry_number >= 8192)
455 		return (EINVAL);
456 	if (ldt_info.contents == 3) {
457 		if (oldmode)
458 			return (EINVAL);
459 		if (ldt_info.seg_not_present)
460 			return (EINVAL);
461 	}
462 
463 	if (ldt_info.base_addr == 0 && ldt_info.limit == 0 &&
464 	    (oldmode || (ldt_info.contents == 0 &&
465 	    ldt_info.read_exec_only == 1 && ldt_info.seg_32bit == 0 &&
466 	    ldt_info.limit_in_pages == 0 && ldt_info.seg_not_present == 1 &&
467 	    ldt_info.useable == 0))) {
468 		/* this means you should zero the ldt */
469 		(void)memset(&sd, 0, sizeof(sd));
470 	} else {
471 		sd.sd_lobase = ldt_info.base_addr & 0xffffff;
472 		sd.sd_hibase = (ldt_info.base_addr >> 24) & 0xff;
473 		sd.sd_lolimit = ldt_info.limit & 0xffff;
474 		sd.sd_hilimit = (ldt_info.limit >> 16) & 0xf;
475 		sd.sd_type = 16 | (ldt_info.contents << 2) |
476 		    (!ldt_info.read_exec_only << 1);
477 		sd.sd_dpl = SEL_UPL;
478 		sd.sd_p = !ldt_info.seg_not_present;
479 		sd.sd_def32 = ldt_info.seg_32bit;
480 		sd.sd_gran = ldt_info.limit_in_pages;
481 		if (!oldmode)
482 			sd.sd_xx = ldt_info.useable;
483 		else
484 			sd.sd_xx = 0;
485 	}
486 	sg = stackgap_init(p, 0);
487 	sl.start = ldt_info.entry_number;
488 	sl.desc = stackgap_alloc(p, &sg, sizeof(sd));
489 	sl.num = 1;
490 
491 	DPRINTF(("linux_write_ldt: idx=%d, base=0x%lx, limit=0x%x\n",
492 	    ldt_info.entry_number, ldt_info.base_addr, ldt_info.limit));
493 
494 	parms = stackgap_alloc(p, &sg, sizeof(sl));
495 
496 	if ((error = copyout(&sd, sl.desc, sizeof(sd))) != 0)
497 		return (error);
498 	if ((error = copyout(&sl, parms, sizeof(sl))) != 0)
499 		return (error);
500 
501 	if ((error = i386_set_ldt(p, parms, retval)) != 0)
502 		return (error);
503 
504 	*retval = 0;
505 	return (0);
506 }
507 
508 #endif /* USER_LDT */
509 
510 int
511 linux_sys_modify_ldt(p, v, retval)
512 	struct proc *p;
513 	void *v;
514 	register_t *retval;
515 {
516 	struct linux_sys_modify_ldt_args /* {
517 		syscallarg(int) func;
518 		syscallarg(void *) ptr;
519 		syscallarg(size_t) bytecount;
520 	} */ *uap = v;
521 
522 	switch (SCARG(uap, func)) {
523 #ifdef USER_LDT
524 	case 0:
525 		return linux_read_ldt(p, uap, retval);
526 	case 1:
527 		retval[0] = 1;
528 		return linux_write_ldt(p, uap, retval);
529 	case 2:
530 #ifdef notyet
531 		return (linux_read_default_ldt(p, uap, retval);
532 #else
533 		return (ENOSYS);
534 #endif
535 	case 0x11:
536 		retval[0] = 0;
537 		return linux_write_ldt(p, uap, retval);
538 #endif /* USER_LDT */
539 
540 	default:
541 		return (ENOSYS);
542 	}
543 }
544 
545 /*
546  * XXX Pathetic hack to make svgalib work. This will fake the major
547  * device number of an opened VT so that svgalib likes it. grmbl.
548  * Should probably do it 'wrong the right way' and use a mapping
549  * array for all major device numbers, and map linux_mknod too.
550  */
551 dev_t
552 linux_fakedev(dev, raw)
553 	dev_t dev;
554 	int raw;
555 {
556 	if (raw) {
557 #if (NWSDISPLAY > 0)
558 		extern const struct cdevsw wsdisplay_cdevsw;
559 		if (cdevsw_lookup(dev) == &wsdisplay_cdevsw)
560 			return makedev(LINUX_CONS_MAJOR, (minor(dev) + 1));
561 #endif
562 	}
563 
564 	return dev;
565 }
566 
567 #if (NWSDISPLAY > 0)
568 /*
569  * That's not complete, but enough to get an X server running.
570  */
571 #define NR_KEYS 128
572 static const u_short plain_map[NR_KEYS] = {
573 	0x0200,	0x001b,	0x0031,	0x0032,	0x0033,	0x0034,	0x0035,	0x0036,
574 	0x0037,	0x0038,	0x0039,	0x0030,	0x002d,	0x003d,	0x007f,	0x0009,
575 	0x0b71,	0x0b77,	0x0b65,	0x0b72,	0x0b74,	0x0b79,	0x0b75,	0x0b69,
576 	0x0b6f,	0x0b70,	0x005b,	0x005d,	0x0201,	0x0702,	0x0b61,	0x0b73,
577 	0x0b64,	0x0b66,	0x0b67,	0x0b68,	0x0b6a,	0x0b6b,	0x0b6c,	0x003b,
578 	0x0027,	0x0060,	0x0700,	0x005c,	0x0b7a,	0x0b78,	0x0b63,	0x0b76,
579 	0x0b62,	0x0b6e,	0x0b6d,	0x002c,	0x002e,	0x002f,	0x0700,	0x030c,
580 	0x0703,	0x0020,	0x0207,	0x0100,	0x0101,	0x0102,	0x0103,	0x0104,
581 	0x0105,	0x0106,	0x0107,	0x0108,	0x0109,	0x0208,	0x0209,	0x0307,
582 	0x0308,	0x0309,	0x030b,	0x0304,	0x0305,	0x0306,	0x030a,	0x0301,
583 	0x0302,	0x0303,	0x0300,	0x0310,	0x0206,	0x0200,	0x003c,	0x010a,
584 	0x010b,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
585 	0x030e,	0x0702,	0x030d,	0x001c,	0x0701,	0x0205,	0x0114,	0x0603,
586 	0x0118,	0x0601,	0x0602,	0x0117,	0x0600,	0x0119,	0x0115,	0x0116,
587 	0x011a,	0x010c,	0x010d,	0x011b,	0x011c,	0x0110,	0x0311,	0x011d,
588 	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
589 }, shift_map[NR_KEYS] = {
590 	0x0200,	0x001b,	0x0021,	0x0040,	0x0023,	0x0024,	0x0025,	0x005e,
591 	0x0026,	0x002a,	0x0028,	0x0029,	0x005f,	0x002b,	0x007f,	0x0009,
592 	0x0b51,	0x0b57,	0x0b45,	0x0b52,	0x0b54,	0x0b59,	0x0b55,	0x0b49,
593 	0x0b4f,	0x0b50,	0x007b,	0x007d,	0x0201,	0x0702,	0x0b41,	0x0b53,
594 	0x0b44,	0x0b46,	0x0b47,	0x0b48,	0x0b4a,	0x0b4b,	0x0b4c,	0x003a,
595 	0x0022,	0x007e,	0x0700,	0x007c,	0x0b5a,	0x0b58,	0x0b43,	0x0b56,
596 	0x0b42,	0x0b4e,	0x0b4d,	0x003c,	0x003e,	0x003f,	0x0700,	0x030c,
597 	0x0703,	0x0020,	0x0207,	0x010a,	0x010b,	0x010c,	0x010d,	0x010e,
598 	0x010f,	0x0110,	0x0111,	0x0112,	0x0113,	0x0213,	0x0203,	0x0307,
599 	0x0308,	0x0309,	0x030b,	0x0304,	0x0305,	0x0306,	0x030a,	0x0301,
600 	0x0302,	0x0303,	0x0300,	0x0310,	0x0206,	0x0200,	0x003e,	0x010a,
601 	0x010b,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
602 	0x030e,	0x0702,	0x030d,	0x0200,	0x0701,	0x0205,	0x0114,	0x0603,
603 	0x020b,	0x0601,	0x0602,	0x0117,	0x0600,	0x020a,	0x0115,	0x0116,
604 	0x011a,	0x010c,	0x010d,	0x011b,	0x011c,	0x0110,	0x0311,	0x011d,
605 	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
606 }, altgr_map[NR_KEYS] = {
607 	0x0200,	0x0200,	0x0200,	0x0040,	0x0200,	0x0024,	0x0200,	0x0200,
608 	0x007b,	0x005b,	0x005d,	0x007d,	0x005c,	0x0200,	0x0200,	0x0200,
609 	0x0b71,	0x0b77,	0x0918,	0x0b72,	0x0b74,	0x0b79,	0x0b75,	0x0b69,
610 	0x0b6f,	0x0b70,	0x0200,	0x007e,	0x0201,	0x0702,	0x0914,	0x0b73,
611 	0x0917,	0x0919,	0x0b67,	0x0b68,	0x0b6a,	0x0b6b,	0x0b6c,	0x0200,
612 	0x0200,	0x0200,	0x0700,	0x0200,	0x0b7a,	0x0b78,	0x0916,	0x0b76,
613 	0x0915,	0x0b6e,	0x0b6d,	0x0200,	0x0200,	0x0200,	0x0700,	0x030c,
614 	0x0703,	0x0200,	0x0207,	0x050c,	0x050d,	0x050e,	0x050f,	0x0510,
615 	0x0511,	0x0512,	0x0513,	0x0514,	0x0515,	0x0208,	0x0202,	0x0911,
616 	0x0912,	0x0913,	0x030b,	0x090e,	0x090f,	0x0910,	0x030a,	0x090b,
617 	0x090c,	0x090d,	0x090a,	0x0310,	0x0206,	0x0200,	0x007c,	0x0516,
618 	0x0517,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
619 	0x030e,	0x0702,	0x030d,	0x0200,	0x0701,	0x0205,	0x0114,	0x0603,
620 	0x0118,	0x0601,	0x0602,	0x0117,	0x0600,	0x0119,	0x0115,	0x0116,
621 	0x011a,	0x010c,	0x010d,	0x011b,	0x011c,	0x0110,	0x0311,	0x011d,
622 	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
623 }, ctrl_map[NR_KEYS] = {
624 	0x0200,	0x0200,	0x0200,	0x0000,	0x001b,	0x001c,	0x001d,	0x001e,
625 	0x001f,	0x007f,	0x0200,	0x0200,	0x001f,	0x0200,	0x0008,	0x0200,
626 	0x0011,	0x0017,	0x0005,	0x0012,	0x0014,	0x0019,	0x0015,	0x0009,
627 	0x000f,	0x0010,	0x001b,	0x001d,	0x0201,	0x0702,	0x0001,	0x0013,
628 	0x0004,	0x0006,	0x0007,	0x0008,	0x000a,	0x000b,	0x000c,	0x0200,
629 	0x0007,	0x0000,	0x0700,	0x001c,	0x001a,	0x0018,	0x0003,	0x0016,
630 	0x0002,	0x000e,	0x000d,	0x0200,	0x020e,	0x007f,	0x0700,	0x030c,
631 	0x0703,	0x0000,	0x0207,	0x0100,	0x0101,	0x0102,	0x0103,	0x0104,
632 	0x0105,	0x0106,	0x0107,	0x0108,	0x0109,	0x0208,	0x0204,	0x0307,
633 	0x0308,	0x0309,	0x030b,	0x0304,	0x0305,	0x0306,	0x030a,	0x0301,
634 	0x0302,	0x0303,	0x0300,	0x0310,	0x0206,	0x0200,	0x0200,	0x010a,
635 	0x010b,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
636 	0x030e,	0x0702,	0x030d,	0x001c,	0x0701,	0x0205,	0x0114,	0x0603,
637 	0x0118,	0x0601,	0x0602,	0x0117,	0x0600,	0x0119,	0x0115,	0x0116,
638 	0x011a,	0x010c,	0x010d,	0x011b,	0x011c,	0x0110,	0x0311,	0x011d,
639 	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
640 };
641 
642 const u_short * const linux_keytabs[] = {
643 	plain_map, shift_map, altgr_map, altgr_map, ctrl_map
644 };
645 #endif
646 
647 static struct biosdisk_info *
648 fd2biosinfo(p, fp)
649 	struct proc *p;
650 	struct file *fp;
651 {
652 	struct vnode *vp;
653 	const char *blkname;
654 	char diskname[16];
655 	int i;
656 	struct nativedisk_info *nip;
657 	struct disklist *dl = i386_alldisks;
658 
659 	if (fp->f_type != DTYPE_VNODE)
660 		return NULL;
661 	vp = (struct vnode *)fp->f_data;
662 
663 	if (vp->v_type != VBLK)
664 		return NULL;
665 
666 	blkname = devsw_blk2name(major(vp->v_rdev));
667 	snprintf(diskname, sizeof diskname, "%s%u", blkname,
668 	    DISKUNIT(vp->v_rdev));
669 
670 	for (i = 0; i < dl->dl_nnativedisks; i++) {
671 		nip = &dl->dl_nativedisks[i];
672 		if (strcmp(diskname, nip->ni_devname))
673 			continue;
674 		if (nip->ni_nmatches != 0)
675 			return &dl->dl_biosdisks[nip->ni_biosmatches[0]];
676 	}
677 
678 	return NULL;
679 }
680 
681 
682 /*
683  * We come here in a last attempt to satisfy a Linux ioctl() call
684  */
685 int
686 linux_machdepioctl(p, v, retval)
687 	struct proc *p;
688 	void *v;
689 	register_t *retval;
690 {
691 	struct linux_sys_ioctl_args /* {
692 		syscallarg(int) fd;
693 		syscallarg(u_long) com;
694 		syscallarg(caddr_t) data;
695 	} */ *uap = v;
696 	struct sys_ioctl_args bia;
697 	u_long com;
698 	int error, error1;
699 #if (NWSDISPLAY > 0)
700 	struct vt_mode lvt;
701 	caddr_t bvtp, sg;
702 	struct kbentry kbe;
703 #endif
704 	struct linux_hd_geometry hdg;
705 	struct linux_hd_big_geometry hdg_big;
706 	struct biosdisk_info *bip;
707 	struct filedesc *fdp;
708 	struct file *fp;
709 	int fd;
710 	struct disklabel label, *labp;
711 	struct partinfo partp;
712 	int (*ioctlf) __P((struct file *, u_long, caddr_t, struct proc *));
713 	u_long start, biostotal, realtotal;
714 	u_char heads, sectors;
715 	u_int cylinders;
716 	struct ioctl_pt pt;
717 
718 	fd = SCARG(uap, fd);
719 	SCARG(&bia, fd) = fd;
720 	SCARG(&bia, data) = SCARG(uap, data);
721 	com = SCARG(uap, com);
722 
723 	fdp = p->p_fd;
724 
725 	if ((fp = fd_getfile(fdp, fd)) == NULL)
726 		return (EBADF);
727 
728 	switch (com) {
729 #if (NWSDISPLAY > 0)
730 	case LINUX_KDGKBMODE:
731 		com = KDGKBMODE;
732 		break;
733 	case LINUX_KDSKBMODE:
734 		com = KDSKBMODE;
735 		if ((unsigned)SCARG(uap, data) == LINUX_K_MEDIUMRAW)
736 			SCARG(&bia, data) = (caddr_t)K_RAW;
737 		break;
738 	case LINUX_KIOCSOUND:
739 		SCARG(&bia, data) =
740 		    (caddr_t)(((unsigned long)SCARG(&bia, data)) & 0xffff);
741 		/* fall through */
742 	case LINUX_KDMKTONE:
743 		com = KDMKTONE;
744 		break;
745 	case LINUX_KDSETMODE:
746 		com = KDSETMODE;
747 		break;
748 	case LINUX_KDGETMODE:
749 		/* KD_* values are equal to the wscons numbers */
750 		com = WSDISPLAYIO_GMODE;
751 		break;
752 	case LINUX_KDENABIO:
753 		com = KDENABIO;
754 		break;
755 	case LINUX_KDDISABIO:
756 		com = KDDISABIO;
757 		break;
758 	case LINUX_KDGETLED:
759 		com = KDGETLED;
760 		break;
761 	case LINUX_KDSETLED:
762 		com = KDSETLED;
763 		break;
764 	case LINUX_VT_OPENQRY:
765 		com = VT_OPENQRY;
766 		break;
767 	case LINUX_VT_GETMODE:
768 		SCARG(&bia, com) = VT_GETMODE;
769 		if ((error = sys_ioctl(p, &bia, retval)))
770 			return error;
771 		if ((error = copyin(SCARG(uap, data), (caddr_t)&lvt,
772 		    sizeof (struct vt_mode))))
773 			return error;
774 		lvt.relsig = native_to_linux_signo[lvt.relsig];
775 		lvt.acqsig = native_to_linux_signo[lvt.acqsig];
776 		lvt.frsig = native_to_linux_signo[lvt.frsig];
777 		return copyout((caddr_t)&lvt, SCARG(uap, data),
778 		    sizeof (struct vt_mode));
779 	case LINUX_VT_SETMODE:
780 		com = VT_SETMODE;
781 		if ((error = copyin(SCARG(uap, data), (caddr_t)&lvt,
782 		    sizeof (struct vt_mode))))
783 			return error;
784 		lvt.relsig = linux_to_native_signo[lvt.relsig];
785 		lvt.acqsig = linux_to_native_signo[lvt.acqsig];
786 		lvt.frsig = linux_to_native_signo[lvt.frsig];
787 		sg = stackgap_init(p, 0);
788 		bvtp = stackgap_alloc(p, &sg, sizeof (struct vt_mode));
789 		if ((error = copyout(&lvt, bvtp, sizeof (struct vt_mode))))
790 			return error;
791 		SCARG(&bia, data) = bvtp;
792 		break;
793 	case LINUX_VT_DISALLOCATE:
794 		/* XXX should use WSDISPLAYIO_DELSCREEN */
795 		return 0;
796 	case LINUX_VT_RELDISP:
797 		com = VT_RELDISP;
798 		break;
799 	case LINUX_VT_ACTIVATE:
800 		com = VT_ACTIVATE;
801 		break;
802 	case LINUX_VT_WAITACTIVE:
803 		com = VT_WAITACTIVE;
804 		break;
805 	case LINUX_VT_GETSTATE:
806 		com = VT_GETSTATE;
807 		break;
808 	case LINUX_KDGKBTYPE:
809 		/* This is what Linux does. */
810 		return (subyte(SCARG(uap, data), KB_101));
811 	case LINUX_KDGKBENT:
812 		/*
813 		 * The Linux KDGKBENT ioctl is different from the
814 		 * SYSV original. So we handle it in machdep code.
815 		 * XXX We should use keyboard mapping information
816 		 * from wsdisplay, but this would be expensive.
817 		 */
818 		if ((error = copyin(SCARG(uap, data), &kbe,
819 				    sizeof(struct kbentry))))
820 			return (error);
821 		if (kbe.kb_table >= sizeof(linux_keytabs) / sizeof(u_short *)
822 		    || kbe.kb_index >= NR_KEYS)
823 			return (EINVAL);
824 		kbe.kb_value = linux_keytabs[kbe.kb_table][kbe.kb_index];
825 		return (copyout(&kbe, SCARG(uap, data),
826 				sizeof(struct kbentry)));
827 #endif
828 	case LINUX_HDIO_GETGEO:
829 	case LINUX_HDIO_GETGEO_BIG:
830 		/*
831 		 * Try to mimic Linux behaviour: return the BIOS geometry
832 		 * if possible (extending its # of cylinders if it's beyond
833 		 * the 1023 limit), fall back to the MI geometry (i.e.
834 		 * the real geometry) if not found, by returning an
835 		 * error. See common/linux_hdio.c
836 		 */
837 		FILE_USE(fp);
838 		bip = fd2biosinfo(p, fp);
839 		ioctlf = fp->f_ops->fo_ioctl;
840 		error = ioctlf(fp, DIOCGDEFLABEL, (caddr_t)&label, p);
841 		error1 = ioctlf(fp, DIOCGPART, (caddr_t)&partp, p);
842 		FILE_UNUSE(fp, p);
843 		if (error != 0 && error1 != 0)
844 			return error1;
845 		labp = error != 0 ? &label : partp.disklab;
846 		start = error1 != 0 ? partp.part->p_offset : 0;
847 		if (bip != NULL && bip->bi_head != 0 && bip->bi_sec != 0
848 		    && bip->bi_cyl != 0) {
849 			heads = bip->bi_head;
850 			sectors = bip->bi_sec;
851 			cylinders = bip->bi_cyl;
852 			biostotal = heads * sectors * cylinders;
853 			realtotal = labp->d_ntracks * labp->d_nsectors *
854 			    labp->d_ncylinders;
855 			if (realtotal > biostotal)
856 				cylinders = realtotal / (heads * sectors);
857 		} else {
858 			heads = labp->d_ntracks;
859 			cylinders = labp->d_ncylinders;
860 			sectors = labp->d_nsectors;
861 		}
862 		if (com == LINUX_HDIO_GETGEO) {
863 			hdg.start = start;
864 			hdg.heads = heads;
865 			hdg.cylinders = cylinders;
866 			hdg.sectors = sectors;
867 			return copyout(&hdg, SCARG(uap, data), sizeof hdg);
868 		} else {
869 			hdg_big.start = start;
870 			hdg_big.heads = heads;
871 			hdg_big.cylinders = cylinders;
872 			hdg_big.sectors = sectors;
873 			return copyout(&hdg_big, SCARG(uap, data),
874 			    sizeof hdg_big);
875 		}
876 
877 	default:
878 		/*
879 		 * Unknown to us. If it's on a device, just pass it through
880 		 * using PTIOCLINUX, the device itself might be able to
881 		 * make some sense of it.
882 		 * XXX hack: if the function returns EJUSTRETURN,
883 		 * it has stuffed a sysctl return value in pt.data.
884 		 */
885 		FILE_USE(fp);
886 		ioctlf = fp->f_ops->fo_ioctl;
887 		pt.com = SCARG(uap, com);
888 		pt.data = SCARG(uap, data);
889 		error = ioctlf(fp, PTIOCLINUX, (caddr_t)&pt, p);
890 		FILE_UNUSE(fp, p);
891 		if (error == EJUSTRETURN) {
892 			retval[0] = (register_t)pt.data;
893 			error = 0;
894 		}
895 
896 		if (error == ENOTTY)
897 			DPRINTF(("linux_machdepioctl: invalid ioctl %08lx\n",
898 			    com));
899 		return error;
900 	}
901 	SCARG(&bia, com) = com;
902 	return sys_ioctl(p, &bia, retval);
903 }
904 
905 /*
906  * Set I/O permissions for a process. Just set the maximum level
907  * right away (ignoring the argument), otherwise we would have
908  * to rely on I/O permission maps, which are not implemented.
909  */
910 int
911 linux_sys_iopl(p, v, retval)
912 	struct proc *p;
913 	void *v;
914 	register_t *retval;
915 {
916 #if 0
917 	struct linux_sys_iopl_args /* {
918 		syscallarg(int) level;
919 	} */ *uap = v;
920 #endif
921 	struct trapframe *fp = p->p_md.md_regs;
922 
923 	if (suser(p->p_ucred, &p->p_acflag) != 0)
924 		return EPERM;
925 	fp->tf_eflags |= PSL_IOPL;
926 	*retval = 0;
927 	return 0;
928 }
929 
930 /*
931  * See above. If a root process tries to set access to an I/O port,
932  * just let it have the whole range.
933  */
934 int
935 linux_sys_ioperm(p, v, retval)
936 	struct proc *p;
937 	void *v;
938 	register_t *retval;
939 {
940 	struct linux_sys_ioperm_args /* {
941 		syscallarg(unsigned int) lo;
942 		syscallarg(unsigned int) hi;
943 		syscallarg(int) val;
944 	} */ *uap = v;
945 	struct trapframe *fp = p->p_md.md_regs;
946 
947 	if (suser(p->p_ucred, &p->p_acflag) != 0)
948 		return EPERM;
949 	if (SCARG(uap, val))
950 		fp->tf_eflags |= PSL_IOPL;
951 	*retval = 0;
952 	return 0;
953 }
954