xref: /netbsd-src/sys/compat/linux/arch/i386/linux_machdep.c (revision 0d1f6a757d4cd2924400dcd163348ea2873ca3d0)
1 /*	$NetBSD: linux_machdep.c,v 1.70 2002/02/16 16:23:09 christos Exp $	*/
2 
3 /*-
4  * Copyright (c) 1995, 2000 The NetBSD Foundation, Inc.
5  * All rights reserved.
6  *
7  * This code is derived from software contributed to The NetBSD Foundation
8  * by Frank van der Linden.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  * 3. All advertising materials mentioning features or use of this software
19  *    must display the following acknowledgement:
20  *	This product includes software developed by the NetBSD
21  *	Foundation, Inc. and its contributors.
22  * 4. Neither the name of The NetBSD Foundation nor the names of its
23  *    contributors may be used to endorse or promote products derived
24  *    from this software without specific prior written permission.
25  *
26  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
27  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
28  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
29  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
30  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36  * POSSIBILITY OF SUCH DAMAGE.
37  */
38 
39 #include <sys/cdefs.h>
40 __KERNEL_RCSID(0, "$NetBSD: linux_machdep.c,v 1.70 2002/02/16 16:23:09 christos Exp $");
41 
42 #if defined(_KERNEL_OPT)
43 #include "opt_vm86.h"
44 #include "opt_user_ldt.h"
45 #endif
46 
47 #include <sys/param.h>
48 #include <sys/systm.h>
49 #include <sys/signalvar.h>
50 #include <sys/kernel.h>
51 #include <sys/map.h>
52 #include <sys/proc.h>
53 #include <sys/user.h>
54 #include <sys/buf.h>
55 #include <sys/reboot.h>
56 #include <sys/conf.h>
57 #include <sys/exec.h>
58 #include <sys/file.h>
59 #include <sys/callout.h>
60 #include <sys/malloc.h>
61 #include <sys/mbuf.h>
62 #include <sys/msgbuf.h>
63 #include <sys/mount.h>
64 #include <sys/vnode.h>
65 #include <sys/device.h>
66 #include <sys/syscallargs.h>
67 #include <sys/filedesc.h>
68 #include <sys/exec_elf.h>
69 #include <sys/disklabel.h>
70 #include <sys/ioctl.h>
71 #include <miscfs/specfs/specdev.h>
72 
73 #include <compat/linux/common/linux_types.h>
74 #include <compat/linux/common/linux_signal.h>
75 #include <compat/linux/common/linux_util.h>
76 #include <compat/linux/common/linux_ioctl.h>
77 #include <compat/linux/common/linux_hdio.h>
78 #include <compat/linux/common/linux_exec.h>
79 #include <compat/linux/common/linux_machdep.h>
80 
81 #include <compat/linux/linux_syscallargs.h>
82 
83 #include <machine/cpu.h>
84 #include <machine/cpufunc.h>
85 #include <machine/psl.h>
86 #include <machine/reg.h>
87 #include <machine/segments.h>
88 #include <machine/specialreg.h>
89 #include <machine/sysarch.h>
90 #include <machine/vm86.h>
91 #include <machine/vmparam.h>
92 
93 /*
94  * To see whether wscons is configured (for virtual console ioctl calls).
95  */
96 #if defined(_KERNEL_OPT)
97 #include "wsdisplay.h"
98 #endif
99 #if (NWSDISPLAY > 0)
100 #include <dev/wscons/wsconsio.h>
101 #include <dev/wscons/wsdisplay_usl_io.h>
102 #if defined(_KERNEL_OPT)
103 #include "opt_xserver.h"
104 #endif
105 #endif
106 
107 #ifdef USER_LDT
108 #include <machine/cpu.h>
109 int linux_read_ldt __P((struct proc *, struct linux_sys_modify_ldt_args *,
110     register_t *));
111 int linux_write_ldt __P((struct proc *, struct linux_sys_modify_ldt_args *,
112     register_t *));
113 #endif
114 
115 static struct biosdisk_info *fd2biosinfo __P((struct proc *, struct file *));
116 extern struct disklist *i386_alldisks;
117 extern const char *findblkname __P((int));
118 
119 /*
120  * Deal with some i386-specific things in the Linux emulation code.
121  */
122 
123 void
124 linux_setregs(p, epp, stack)
125 	struct proc *p;
126 	struct exec_package *epp;
127 	u_long stack;
128 {
129 	struct pcb *pcb = &p->p_addr->u_pcb;
130 
131 	setregs(p, epp, stack);
132 	if (i386_use_fxsave)
133 		pcb->pcb_savefpu.sv_xmm.sv_env.en_cw = __Linux_NPXCW__;
134 	else
135 		pcb->pcb_savefpu.sv_87.sv_env.en_cw = __Linux_NPXCW__;
136 }
137 
138 /*
139  * Send an interrupt to process.
140  *
141  * Stack is set up to allow sigcode stored
142  * in u. to call routine, followed by kcall
143  * to sigreturn routine below.  After sigreturn
144  * resets the signal mask, the stack, and the
145  * frame pointer, it returns to the user
146  * specified pc, psl.
147  */
148 
149 void
150 linux_sendsig(catcher, sig, mask, code)
151 	sig_t catcher;
152 	int sig;
153 	sigset_t *mask;
154 	u_long code;
155 {
156 	struct proc *p = curproc;
157 	struct trapframe *tf;
158 	struct linux_sigframe *fp, frame;
159 	int onstack;
160 
161 	tf = p->p_md.md_regs;
162 
163 	/* Do we need to jump onto the signal stack? */
164 	onstack =
165 	    (p->p_sigctx.ps_sigstk.ss_flags & (SS_DISABLE | SS_ONSTACK)) == 0 &&
166 	    (SIGACTION(p, sig).sa_flags & SA_ONSTACK) != 0;
167 
168 	/* Allocate space for the signal handler context. */
169 	if (onstack)
170 		fp = (struct linux_sigframe *)((caddr_t)p->p_sigctx.ps_sigstk.ss_sp +
171 					  p->p_sigctx.ps_sigstk.ss_size);
172 	else
173 		fp = (struct linux_sigframe *)tf->tf_esp;
174 	fp--;
175 
176 	/* Build stack frame for signal trampoline. */
177 	frame.sf_handler = catcher;
178 	frame.sf_sig = native_to_linux_sig[sig];
179 
180 	/* Save register context. */
181 #ifdef VM86
182 	if (tf->tf_eflags & PSL_VM) {
183 		frame.sf_sc.sc_gs = tf->tf_vm86_gs;
184 		frame.sf_sc.sc_fs = tf->tf_vm86_fs;
185 		frame.sf_sc.sc_es = tf->tf_vm86_es;
186 		frame.sf_sc.sc_ds = tf->tf_vm86_ds;
187 		frame.sf_sc.sc_eflags = get_vflags(p);
188 	} else
189 #endif
190 	{
191 		frame.sf_sc.sc_gs = tf->tf_gs;
192 		frame.sf_sc.sc_fs = tf->tf_fs;
193 		frame.sf_sc.sc_es = tf->tf_es;
194 		frame.sf_sc.sc_ds = tf->tf_ds;
195 		frame.sf_sc.sc_eflags = tf->tf_eflags;
196 	}
197 	frame.sf_sc.sc_edi = tf->tf_edi;
198 	frame.sf_sc.sc_esi = tf->tf_esi;
199 	frame.sf_sc.sc_ebp = tf->tf_ebp;
200 	frame.sf_sc.sc_ebx = tf->tf_ebx;
201 	frame.sf_sc.sc_edx = tf->tf_edx;
202 	frame.sf_sc.sc_ecx = tf->tf_ecx;
203 	frame.sf_sc.sc_eax = tf->tf_eax;
204 	frame.sf_sc.sc_eip = tf->tf_eip;
205 	frame.sf_sc.sc_cs = tf->tf_cs;
206 	frame.sf_sc.sc_esp_at_signal = tf->tf_esp;
207 	frame.sf_sc.sc_ss = tf->tf_ss;
208 	frame.sf_sc.sc_err = tf->tf_err;
209 	frame.sf_sc.sc_trapno = tf->tf_trapno;
210 	frame.sf_sc.sc_cr2 = p->p_addr->u_pcb.pcb_cr2;
211 
212 	/* Save signal stack. */
213 	/* Linux doesn't save the onstack flag in sigframe */
214 
215 	/* Save signal mask. */
216 	native_to_linux_old_sigset(&frame.sf_sc.sc_mask, mask);
217 
218 	if (copyout(&frame, fp, sizeof(frame)) != 0) {
219 		/*
220 		 * Process has trashed its stack; give it an illegal
221 		 * instruction to halt it in its tracks.
222 		 */
223 		sigexit(p, SIGILL);
224 		/* NOTREACHED */
225 	}
226 
227 	/*
228 	 * Build context to run handler in.
229 	 */
230 	tf->tf_es = GSEL(GUDATA_SEL, SEL_UPL);
231 	tf->tf_ds = GSEL(GUDATA_SEL, SEL_UPL);
232 	tf->tf_eip = (int)p->p_sigctx.ps_sigcode;
233 	tf->tf_cs = GSEL(GUCODE_SEL, SEL_UPL);
234 	tf->tf_eflags &= ~(PSL_T|PSL_VM|PSL_AC);
235 	tf->tf_esp = (int)fp;
236 	tf->tf_ss = GSEL(GUDATA_SEL, SEL_UPL);
237 
238 	/* Remember that we're now on the signal stack. */
239 	if (onstack)
240 		p->p_sigctx.ps_sigstk.ss_flags |= SS_ONSTACK;
241 }
242 
243 /*
244  * System call to cleanup state after a signal
245  * has been taken.  Reset signal mask and
246  * stack state from context left by sendsig (above).
247  * Return to previous pc and psl as specified by
248  * context left by sendsig. Check carefully to
249  * make sure that the user has not modified the
250  * psl to gain improper privileges or to cause
251  * a machine fault.
252  */
253 int
254 linux_sys_rt_sigreturn(p, v, retval)
255 	struct proc *p;
256 	void *v;
257 	register_t *retval;
258 {
259 	/* XXX XAX write me */
260 	return(ENOSYS);
261 }
262 
263 int
264 linux_sys_sigreturn(p, v, retval)
265 	struct proc *p;
266 	void *v;
267 	register_t *retval;
268 {
269 	struct linux_sys_sigreturn_args /* {
270 		syscallarg(struct linux_sigcontext *) scp;
271 	} */ *uap = v;
272 	struct linux_sigcontext *scp, context;
273 	struct trapframe *tf;
274 	sigset_t mask;
275 	ssize_t ss_gap;
276 
277 	/*
278 	 * The trampoline code hands us the context.
279 	 * It is unsafe to keep track of it ourselves, in the event that a
280 	 * program jumps out of a signal handler.
281 	 */
282 	scp = SCARG(uap, scp);
283 	if (copyin((caddr_t)scp, &context, sizeof(*scp)) != 0)
284 		return (EFAULT);
285 
286 	/* Restore register context. */
287 	tf = p->p_md.md_regs;
288 #ifdef VM86
289 	if (context.sc_eflags & PSL_VM) {
290 		tf->tf_vm86_gs = context.sc_gs;
291 		tf->tf_vm86_fs = context.sc_fs;
292 		tf->tf_vm86_es = context.sc_es;
293 		tf->tf_vm86_ds = context.sc_ds;
294 		set_vflags(p, context.sc_eflags);
295 	} else
296 #endif
297 	{
298 		/*
299 		 * Check for security violations.  If we're returning to
300 		 * protected mode, the CPU will validate the segment registers
301 		 * automatically and generate a trap on violations.  We handle
302 		 * the trap, rather than doing all of the checking here.
303 		 */
304 		if (((context.sc_eflags ^ tf->tf_eflags) & PSL_USERSTATIC) != 0 ||
305 		    !USERMODE(context.sc_cs, context.sc_eflags))
306 			return (EINVAL);
307 
308 		/* %fs and %gs were restored by the trampoline. */
309 		tf->tf_es = context.sc_es;
310 		tf->tf_ds = context.sc_ds;
311 		tf->tf_eflags = context.sc_eflags;
312 	}
313 	tf->tf_edi = context.sc_edi;
314 	tf->tf_esi = context.sc_esi;
315 	tf->tf_ebp = context.sc_ebp;
316 	tf->tf_ebx = context.sc_ebx;
317 	tf->tf_edx = context.sc_edx;
318 	tf->tf_ecx = context.sc_ecx;
319 	tf->tf_eax = context.sc_eax;
320 	tf->tf_eip = context.sc_eip;
321 	tf->tf_cs = context.sc_cs;
322 	tf->tf_esp = context.sc_esp_at_signal;
323 	tf->tf_ss = context.sc_ss;
324 
325 	/* Restore signal stack. */
326 	/*
327 	 * Linux really does it this way; it doesn't have space in sigframe
328 	 * to save the onstack flag.
329 	 */
330 	ss_gap = (ssize_t)
331 	    ((caddr_t) context.sc_esp_at_signal - (caddr_t) p->p_sigctx.ps_sigstk.ss_sp);
332 	if (ss_gap >= 0  && ss_gap < p->p_sigctx.ps_sigstk.ss_size)
333 		p->p_sigctx.ps_sigstk.ss_flags |= SS_ONSTACK;
334 	else
335 		p->p_sigctx.ps_sigstk.ss_flags &= ~SS_ONSTACK;
336 
337 	/* Restore signal mask. */
338 	linux_old_to_native_sigset(&mask, &context.sc_mask);
339 	(void) sigprocmask1(p, SIG_SETMASK, &mask, 0);
340 
341 	return (EJUSTRETURN);
342 }
343 
344 #ifdef USER_LDT
345 
346 int
347 linux_read_ldt(p, uap, retval)
348 	struct proc *p;
349 	struct linux_sys_modify_ldt_args /* {
350 		syscallarg(int) func;
351 		syscallarg(void *) ptr;
352 		syscallarg(size_t) bytecount;
353 	} */ *uap;
354 	register_t *retval;
355 {
356 	struct i386_get_ldt_args gl;
357 	int error;
358 	caddr_t sg;
359 	char *parms;
360 
361 	sg = stackgap_init(p->p_emul);
362 
363 	gl.start = 0;
364 	gl.desc = SCARG(uap, ptr);
365 	gl.num = SCARG(uap, bytecount) / sizeof(union descriptor);
366 
367 	parms = stackgap_alloc(&sg, sizeof(gl));
368 
369 	if ((error = copyout(&gl, parms, sizeof(gl))) != 0)
370 		return (error);
371 
372 	if ((error = i386_get_ldt(p, parms, retval)) != 0)
373 		return (error);
374 
375 	*retval *= sizeof(union descriptor);
376 	return (0);
377 }
378 
379 struct linux_ldt_info {
380 	u_int entry_number;
381 	u_long base_addr;
382 	u_int limit;
383 	u_int seg_32bit:1;
384 	u_int contents:2;
385 	u_int read_exec_only:1;
386 	u_int limit_in_pages:1;
387 	u_int seg_not_present:1;
388 };
389 
390 int
391 linux_write_ldt(p, uap, retval)
392 	struct proc *p;
393 	struct linux_sys_modify_ldt_args /* {
394 		syscallarg(int) func;
395 		syscallarg(void *) ptr;
396 		syscallarg(size_t) bytecount;
397 	} */ *uap;
398 	register_t *retval;
399 {
400 	struct linux_ldt_info ldt_info;
401 	struct segment_descriptor sd;
402 	struct i386_set_ldt_args sl;
403 	int error;
404 	caddr_t sg;
405 	char *parms;
406 
407 	if (SCARG(uap, bytecount) != sizeof(ldt_info))
408 		return (EINVAL);
409 	if ((error = copyin(SCARG(uap, ptr), &ldt_info, sizeof(ldt_info))) != 0)
410 		return error;
411 	if (ldt_info.contents == 3)
412 		return (EINVAL);
413 
414 	if (ldt_info.base_addr == 0 && ldt_info.limit == 0) {
415 		/* this means you should zero the ldt */
416 		(void)memset(&sd, 0, sizeof(sd));
417 	} else {
418 		sd.sd_lobase = ldt_info.base_addr & 0xffffff;
419 		sd.sd_hibase = (ldt_info.base_addr >> 24) & 0xff;
420 		sd.sd_lolimit = ldt_info.limit & 0xffff;
421 		sd.sd_hilimit = (ldt_info.limit >> 16) & 0xf;
422 		sd.sd_type = 16 | (ldt_info.contents << 2) |
423 		    (!ldt_info.read_exec_only << 1);
424 		sd.sd_dpl = SEL_UPL;
425 		sd.sd_p = !ldt_info.seg_not_present;
426 		sd.sd_def32 = ldt_info.seg_32bit;
427 		sd.sd_gran = ldt_info.limit_in_pages;
428 	}
429 	sg = stackgap_init(p->p_emul);
430 	sl.start = ldt_info.entry_number;
431 	sl.desc = stackgap_alloc(&sg, sizeof(sd));
432 	sl.num = 1;
433 
434 #if 0
435 	printf("linux_write_ldt: idx=%d, base=%x, limit=%x\n",
436 	    ldt_info.entry_number, ldt_info.base_addr, ldt_info.limit);
437 #endif
438 
439 	parms = stackgap_alloc(&sg, sizeof(sl));
440 
441 	if ((error = copyout(&sd, sl.desc, sizeof(sd))) != 0)
442 		return (error);
443 	if ((error = copyout(&sl, parms, sizeof(sl))) != 0)
444 		return (error);
445 
446 	if ((error = i386_set_ldt(p, parms, retval)) != 0)
447 		return (error);
448 
449 	*retval = 0;
450 	return (0);
451 }
452 
453 #endif /* USER_LDT */
454 
455 int
456 linux_sys_modify_ldt(p, v, retval)
457 	struct proc *p;
458 	void *v;
459 	register_t *retval;
460 {
461 	struct linux_sys_modify_ldt_args /* {
462 		syscallarg(int) func;
463 		syscallarg(void *) ptr;
464 		syscallarg(size_t) bytecount;
465 	} */ *uap = v;
466 
467 	switch (SCARG(uap, func)) {
468 #ifdef USER_LDT
469 	case 0:
470 		return (linux_read_ldt(p, uap, retval));
471 
472 	case 1:
473 		return (linux_write_ldt(p, uap, retval));
474 #endif /* USER_LDT */
475 
476 	default:
477 		return (ENOSYS);
478 	}
479 }
480 
481 /*
482  * XXX Pathetic hack to make svgalib work. This will fake the major
483  * device number of an opened VT so that svgalib likes it. grmbl.
484  * Should probably do it 'wrong the right way' and use a mapping
485  * array for all major device numbers, and map linux_mknod too.
486  */
487 dev_t
488 linux_fakedev(dev, raw)
489 	dev_t dev;
490 	int raw;
491 {
492 	if (raw) {
493 #if (NWSDISPLAY > 0)
494 		if (major(dev) == NETBSD_WSCONS_MAJOR)
495 			return makedev(LINUX_CONS_MAJOR, (minor(dev) + 1));
496 #endif
497 		return 0;
498 	} else {
499 		return dev;
500 	}
501 }
502 
503 #if (NWSDISPLAY > 0)
504 /*
505  * That's not complete, but enough to get an X server running.
506  */
507 #define NR_KEYS 128
508 static const u_short plain_map[NR_KEYS] = {
509 	0x0200,	0x001b,	0x0031,	0x0032,	0x0033,	0x0034,	0x0035,	0x0036,
510 	0x0037,	0x0038,	0x0039,	0x0030,	0x002d,	0x003d,	0x007f,	0x0009,
511 	0x0b71,	0x0b77,	0x0b65,	0x0b72,	0x0b74,	0x0b79,	0x0b75,	0x0b69,
512 	0x0b6f,	0x0b70,	0x005b,	0x005d,	0x0201,	0x0702,	0x0b61,	0x0b73,
513 	0x0b64,	0x0b66,	0x0b67,	0x0b68,	0x0b6a,	0x0b6b,	0x0b6c,	0x003b,
514 	0x0027,	0x0060,	0x0700,	0x005c,	0x0b7a,	0x0b78,	0x0b63,	0x0b76,
515 	0x0b62,	0x0b6e,	0x0b6d,	0x002c,	0x002e,	0x002f,	0x0700,	0x030c,
516 	0x0703,	0x0020,	0x0207,	0x0100,	0x0101,	0x0102,	0x0103,	0x0104,
517 	0x0105,	0x0106,	0x0107,	0x0108,	0x0109,	0x0208,	0x0209,	0x0307,
518 	0x0308,	0x0309,	0x030b,	0x0304,	0x0305,	0x0306,	0x030a,	0x0301,
519 	0x0302,	0x0303,	0x0300,	0x0310,	0x0206,	0x0200,	0x003c,	0x010a,
520 	0x010b,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
521 	0x030e,	0x0702,	0x030d,	0x001c,	0x0701,	0x0205,	0x0114,	0x0603,
522 	0x0118,	0x0601,	0x0602,	0x0117,	0x0600,	0x0119,	0x0115,	0x0116,
523 	0x011a,	0x010c,	0x010d,	0x011b,	0x011c,	0x0110,	0x0311,	0x011d,
524 	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
525 }, shift_map[NR_KEYS] = {
526 	0x0200,	0x001b,	0x0021,	0x0040,	0x0023,	0x0024,	0x0025,	0x005e,
527 	0x0026,	0x002a,	0x0028,	0x0029,	0x005f,	0x002b,	0x007f,	0x0009,
528 	0x0b51,	0x0b57,	0x0b45,	0x0b52,	0x0b54,	0x0b59,	0x0b55,	0x0b49,
529 	0x0b4f,	0x0b50,	0x007b,	0x007d,	0x0201,	0x0702,	0x0b41,	0x0b53,
530 	0x0b44,	0x0b46,	0x0b47,	0x0b48,	0x0b4a,	0x0b4b,	0x0b4c,	0x003a,
531 	0x0022,	0x007e,	0x0700,	0x007c,	0x0b5a,	0x0b58,	0x0b43,	0x0b56,
532 	0x0b42,	0x0b4e,	0x0b4d,	0x003c,	0x003e,	0x003f,	0x0700,	0x030c,
533 	0x0703,	0x0020,	0x0207,	0x010a,	0x010b,	0x010c,	0x010d,	0x010e,
534 	0x010f,	0x0110,	0x0111,	0x0112,	0x0113,	0x0213,	0x0203,	0x0307,
535 	0x0308,	0x0309,	0x030b,	0x0304,	0x0305,	0x0306,	0x030a,	0x0301,
536 	0x0302,	0x0303,	0x0300,	0x0310,	0x0206,	0x0200,	0x003e,	0x010a,
537 	0x010b,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
538 	0x030e,	0x0702,	0x030d,	0x0200,	0x0701,	0x0205,	0x0114,	0x0603,
539 	0x020b,	0x0601,	0x0602,	0x0117,	0x0600,	0x020a,	0x0115,	0x0116,
540 	0x011a,	0x010c,	0x010d,	0x011b,	0x011c,	0x0110,	0x0311,	0x011d,
541 	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
542 }, altgr_map[NR_KEYS] = {
543 	0x0200,	0x0200,	0x0200,	0x0040,	0x0200,	0x0024,	0x0200,	0x0200,
544 	0x007b,	0x005b,	0x005d,	0x007d,	0x005c,	0x0200,	0x0200,	0x0200,
545 	0x0b71,	0x0b77,	0x0918,	0x0b72,	0x0b74,	0x0b79,	0x0b75,	0x0b69,
546 	0x0b6f,	0x0b70,	0x0200,	0x007e,	0x0201,	0x0702,	0x0914,	0x0b73,
547 	0x0917,	0x0919,	0x0b67,	0x0b68,	0x0b6a,	0x0b6b,	0x0b6c,	0x0200,
548 	0x0200,	0x0200,	0x0700,	0x0200,	0x0b7a,	0x0b78,	0x0916,	0x0b76,
549 	0x0915,	0x0b6e,	0x0b6d,	0x0200,	0x0200,	0x0200,	0x0700,	0x030c,
550 	0x0703,	0x0200,	0x0207,	0x050c,	0x050d,	0x050e,	0x050f,	0x0510,
551 	0x0511,	0x0512,	0x0513,	0x0514,	0x0515,	0x0208,	0x0202,	0x0911,
552 	0x0912,	0x0913,	0x030b,	0x090e,	0x090f,	0x0910,	0x030a,	0x090b,
553 	0x090c,	0x090d,	0x090a,	0x0310,	0x0206,	0x0200,	0x007c,	0x0516,
554 	0x0517,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
555 	0x030e,	0x0702,	0x030d,	0x0200,	0x0701,	0x0205,	0x0114,	0x0603,
556 	0x0118,	0x0601,	0x0602,	0x0117,	0x0600,	0x0119,	0x0115,	0x0116,
557 	0x011a,	0x010c,	0x010d,	0x011b,	0x011c,	0x0110,	0x0311,	0x011d,
558 	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
559 }, ctrl_map[NR_KEYS] = {
560 	0x0200,	0x0200,	0x0200,	0x0000,	0x001b,	0x001c,	0x001d,	0x001e,
561 	0x001f,	0x007f,	0x0200,	0x0200,	0x001f,	0x0200,	0x0008,	0x0200,
562 	0x0011,	0x0017,	0x0005,	0x0012,	0x0014,	0x0019,	0x0015,	0x0009,
563 	0x000f,	0x0010,	0x001b,	0x001d,	0x0201,	0x0702,	0x0001,	0x0013,
564 	0x0004,	0x0006,	0x0007,	0x0008,	0x000a,	0x000b,	0x000c,	0x0200,
565 	0x0007,	0x0000,	0x0700,	0x001c,	0x001a,	0x0018,	0x0003,	0x0016,
566 	0x0002,	0x000e,	0x000d,	0x0200,	0x020e,	0x007f,	0x0700,	0x030c,
567 	0x0703,	0x0000,	0x0207,	0x0100,	0x0101,	0x0102,	0x0103,	0x0104,
568 	0x0105,	0x0106,	0x0107,	0x0108,	0x0109,	0x0208,	0x0204,	0x0307,
569 	0x0308,	0x0309,	0x030b,	0x0304,	0x0305,	0x0306,	0x030a,	0x0301,
570 	0x0302,	0x0303,	0x0300,	0x0310,	0x0206,	0x0200,	0x0200,	0x010a,
571 	0x010b,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
572 	0x030e,	0x0702,	0x030d,	0x001c,	0x0701,	0x0205,	0x0114,	0x0603,
573 	0x0118,	0x0601,	0x0602,	0x0117,	0x0600,	0x0119,	0x0115,	0x0116,
574 	0x011a,	0x010c,	0x010d,	0x011b,	0x011c,	0x0110,	0x0311,	0x011d,
575 	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
576 };
577 
578 const u_short * const linux_keytabs[] = {
579 	plain_map, shift_map, altgr_map, altgr_map, ctrl_map
580 };
581 #endif
582 
583 static struct biosdisk_info *
584 fd2biosinfo(p, fp)
585 	struct proc *p;
586 	struct file *fp;
587 {
588 	struct vnode *vp;
589 	const char *blkname;
590 	char diskname[16];
591 	int i;
592 	struct nativedisk_info *nip;
593 	struct disklist *dl = i386_alldisks;
594 
595 	if (fp->f_type != DTYPE_VNODE)
596 		return NULL;
597 	vp = (struct vnode *)fp->f_data;
598 
599 	if (vp->v_type != VBLK)
600 		return NULL;
601 
602 	blkname = findblkname(major(vp->v_rdev));
603 	snprintf(diskname, sizeof diskname, "%s%u", blkname,
604 	    DISKUNIT(vp->v_rdev));
605 
606 	for (i = 0; i < dl->dl_nnativedisks; i++) {
607 		nip = &dl->dl_nativedisks[i];
608 		if (strcmp(diskname, nip->ni_devname))
609 			continue;
610 		if (nip->ni_nmatches != 0)
611 			return &dl->dl_biosdisks[nip->ni_biosmatches[0]];
612 	}
613 
614 	return NULL;
615 }
616 
617 
618 /*
619  * We come here in a last attempt to satisfy a Linux ioctl() call
620  */
621 int
622 linux_machdepioctl(p, v, retval)
623 	struct proc *p;
624 	void *v;
625 	register_t *retval;
626 {
627 	struct linux_sys_ioctl_args /* {
628 		syscallarg(int) fd;
629 		syscallarg(u_long) com;
630 		syscallarg(caddr_t) data;
631 	} */ *uap = v;
632 	struct sys_ioctl_args bia;
633 	u_long com;
634 	int error, error1;
635 #if (NWSDISPLAY > 0)
636 	struct vt_mode lvt;
637 	caddr_t bvtp, sg;
638 	struct kbentry kbe;
639 #endif
640 	struct linux_hd_geometry hdg;
641 	struct linux_hd_big_geometry hdg_big;
642 	struct biosdisk_info *bip;
643 	struct filedesc *fdp;
644 	struct file *fp;
645 	int fd;
646 	struct disklabel label, *labp;
647 	struct partinfo partp;
648 	int (*ioctlf) __P((struct file *, u_long, caddr_t, struct proc *));
649 	u_long start, biostotal, realtotal;
650 	u_char heads, sectors;
651 	u_int cylinders;
652 	struct ioctl_pt pt;
653 
654 	fd = SCARG(uap, fd);
655 	SCARG(&bia, fd) = fd;
656 	SCARG(&bia, data) = SCARG(uap, data);
657 	com = SCARG(uap, com);
658 
659 	fdp = p->p_fd;
660 
661 	if ((fp = fd_getfile(fdp, fd)) == NULL)
662 		return (EBADF);
663 
664 	switch (com) {
665 #if (NWSDISPLAY > 0)
666 	case LINUX_KDGKBMODE:
667 		com = KDGKBMODE;
668 		break;
669 	case LINUX_KDSKBMODE:
670 		com = KDSKBMODE;
671 		if ((unsigned)SCARG(uap, data) == LINUX_K_MEDIUMRAW)
672 			SCARG(&bia, data) = (caddr_t)K_RAW;
673 		break;
674 	case LINUX_KIOCSOUND:
675 		SCARG(&bia, data) =
676 		    (caddr_t)(((unsigned long)SCARG(&bia, data)) & 0xffff);
677 		/* fall through */
678 	case LINUX_KDMKTONE:
679 		com = KDMKTONE;
680 		break;
681 	case LINUX_KDSETMODE:
682 		com = KDSETMODE;
683 		break;
684 	case LINUX_KDGETMODE:
685 		/* KD_* values are equal to the wscons numbers */
686 		com = WSDISPLAYIO_GMODE;
687 		break;
688 	case LINUX_KDENABIO:
689 		com = KDENABIO;
690 		break;
691 	case LINUX_KDDISABIO:
692 		com = KDDISABIO;
693 		break;
694 	case LINUX_KDGETLED:
695 		com = KDGETLED;
696 		break;
697 	case LINUX_KDSETLED:
698 		com = KDSETLED;
699 		break;
700 	case LINUX_VT_OPENQRY:
701 		com = VT_OPENQRY;
702 		break;
703 	case LINUX_VT_GETMODE:
704 		SCARG(&bia, com) = VT_GETMODE;
705 		if ((error = sys_ioctl(p, &bia, retval)))
706 			return error;
707 		if ((error = copyin(SCARG(uap, data), (caddr_t)&lvt,
708 		    sizeof (struct vt_mode))))
709 			return error;
710 		lvt.relsig = native_to_linux_sig[lvt.relsig];
711 		lvt.acqsig = native_to_linux_sig[lvt.acqsig];
712 		lvt.frsig = native_to_linux_sig[lvt.frsig];
713 		return copyout((caddr_t)&lvt, SCARG(uap, data),
714 		    sizeof (struct vt_mode));
715 	case LINUX_VT_SETMODE:
716 		com = VT_SETMODE;
717 		if ((error = copyin(SCARG(uap, data), (caddr_t)&lvt,
718 		    sizeof (struct vt_mode))))
719 			return error;
720 		lvt.relsig = linux_to_native_sig[lvt.relsig];
721 		lvt.acqsig = linux_to_native_sig[lvt.acqsig];
722 		lvt.frsig = linux_to_native_sig[lvt.frsig];
723 		sg = stackgap_init(p->p_emul);
724 		bvtp = stackgap_alloc(&sg, sizeof (struct vt_mode));
725 		if ((error = copyout(&lvt, bvtp, sizeof (struct vt_mode))))
726 			return error;
727 		SCARG(&bia, data) = bvtp;
728 		break;
729 	case LINUX_VT_DISALLOCATE:
730 		/* XXX should use WSDISPLAYIO_DELSCREEN */
731 		return 0;
732 	case LINUX_VT_RELDISP:
733 		com = VT_RELDISP;
734 		break;
735 	case LINUX_VT_ACTIVATE:
736 		com = VT_ACTIVATE;
737 		break;
738 	case LINUX_VT_WAITACTIVE:
739 		com = VT_WAITACTIVE;
740 		break;
741 	case LINUX_VT_GETSTATE:
742 		com = VT_GETSTATE;
743 		break;
744 	case LINUX_KDGKBTYPE:
745 		/* This is what Linux does. */
746 		return (subyte(SCARG(uap, data), KB_101));
747 	case LINUX_KDGKBENT:
748 		/*
749 		 * The Linux KDGKBENT ioctl is different from the
750 		 * SYSV original. So we handle it in machdep code.
751 		 * XXX We should use keyboard mapping information
752 		 * from wsdisplay, but this would be expensive.
753 		 */
754 		if ((error = copyin(SCARG(uap, data), &kbe,
755 				    sizeof(struct kbentry))))
756 			return (error);
757 		if (kbe.kb_table >= sizeof(linux_keytabs) / sizeof(u_short *)
758 		    || kbe.kb_index >= NR_KEYS)
759 			return (EINVAL);
760 		kbe.kb_value = linux_keytabs[kbe.kb_table][kbe.kb_index];
761 		return (copyout(&kbe, SCARG(uap, data),
762 				sizeof(struct kbentry)));
763 #endif
764 	case LINUX_HDIO_GETGEO:
765 	case LINUX_HDIO_GETGEO_BIG:
766 		/*
767 		 * Try to mimic Linux behaviour: return the BIOS geometry
768 		 * if possible (extending its # of cylinders if it's beyond
769 		 * the 1023 limit), fall back to the MI geometry (i.e.
770 		 * the real geometry) if not found, by returning an
771 		 * error. See common/linux_hdio.c
772 		 */
773 		FILE_USE(fp);
774 		bip = fd2biosinfo(p, fp);
775 		ioctlf = fp->f_ops->fo_ioctl;
776 		error = ioctlf(fp, DIOCGDEFLABEL, (caddr_t)&label, p);
777 		error1 = ioctlf(fp, DIOCGPART, (caddr_t)&partp, p);
778 		FILE_UNUSE(fp, p);
779 		if (error != 0 && error1 != 0)
780 			return error1;
781 		labp = error != 0 ? &label : partp.disklab;
782 		start = error1 != 0 ? partp.part->p_offset : 0;
783 		if (bip != NULL && bip->bi_head != 0 && bip->bi_sec != 0
784 		    && bip->bi_cyl != 0) {
785 			heads = bip->bi_head;
786 			sectors = bip->bi_sec;
787 			cylinders = bip->bi_cyl;
788 			biostotal = heads * sectors * cylinders;
789 			realtotal = labp->d_ntracks * labp->d_nsectors *
790 			    labp->d_ncylinders;
791 			if (realtotal > biostotal)
792 				cylinders = realtotal / (heads * sectors);
793 		} else {
794 			heads = labp->d_ntracks;
795 			cylinders = labp->d_ncylinders;
796 			sectors = labp->d_nsectors;
797 		}
798 		if (com == LINUX_HDIO_GETGEO) {
799 			hdg.start = start;
800 			hdg.heads = heads;
801 			hdg.cylinders = cylinders;
802 			hdg.sectors = sectors;
803 			return copyout(&hdg, SCARG(uap, data), sizeof hdg);
804 		} else {
805 			hdg_big.start = start;
806 			hdg_big.heads = heads;
807 			hdg_big.cylinders = cylinders;
808 			hdg_big.sectors = sectors;
809 			return copyout(&hdg_big, SCARG(uap, data),
810 			    sizeof hdg_big);
811 		}
812 		return 0;
813 
814 	default:
815 		/*
816 		 * Unknown to us. If it's on a device, just pass it through
817 		 * using PTIOCLINUX, the device itself might be able to
818 		 * make some sense of it.
819 		 * XXX hack: if the function returns EJUSTRETURN,
820 		 * it has stuffed a sysctl return value in pt.data.
821 		 */
822 		FILE_USE(fp);
823 		ioctlf = fp->f_ops->fo_ioctl;
824 		pt.com = SCARG(uap, com);
825 		pt.data = SCARG(uap, data);
826 		error = ioctlf(fp, PTIOCLINUX, (caddr_t)&pt, p);
827 		FILE_UNUSE(fp, p);
828 		if (error == EJUSTRETURN) {
829 			retval[0] = (register_t)pt.data;
830 			error = 0;
831 		}
832 
833 		if (error == ENOTTY)
834 			printf("linux_machdepioctl: invalid ioctl %08lx\n",
835 			    com);
836 		return error;
837 	}
838 	SCARG(&bia, com) = com;
839 	return sys_ioctl(p, &bia, retval);
840 }
841 
842 /*
843  * Set I/O permissions for a process. Just set the maximum level
844  * right away (ignoring the argument), otherwise we would have
845  * to rely on I/O permission maps, which are not implemented.
846  */
847 int
848 linux_sys_iopl(p, v, retval)
849 	struct proc *p;
850 	void *v;
851 	register_t *retval;
852 {
853 #if 0
854 	struct linux_sys_iopl_args /* {
855 		syscallarg(int) level;
856 	} */ *uap = v;
857 #endif
858 	struct trapframe *fp = p->p_md.md_regs;
859 
860 	if (suser(p->p_ucred, &p->p_acflag) != 0)
861 		return EPERM;
862 	fp->tf_eflags |= PSL_IOPL;
863 	*retval = 0;
864 	return 0;
865 }
866 
867 /*
868  * See above. If a root process tries to set access to an I/O port,
869  * just let it have the whole range.
870  */
871 int
872 linux_sys_ioperm(p, v, retval)
873 	struct proc *p;
874 	void *v;
875 	register_t *retval;
876 {
877 	struct linux_sys_ioperm_args /* {
878 		syscallarg(unsigned int) lo;
879 		syscallarg(unsigned int) hi;
880 		syscallarg(int) val;
881 	} */ *uap = v;
882 	struct trapframe *fp = p->p_md.md_regs;
883 
884 	if (suser(p->p_ucred, &p->p_acflag) != 0)
885 		return EPERM;
886 	if (SCARG(uap, val))
887 		fp->tf_eflags |= PSL_IOPL;
888 	*retval = 0;
889 	return 0;
890 }
891