xref: /netbsd-src/sys/compat/linux/arch/i386/linux_machdep.c (revision b519c70ad771d0a55b3c2277db6b97a05fa6465d)
1 /*	$NetBSD: linux_machdep.c,v 1.68 2001/11/13 02:08:38 lukem Exp $	*/
2 
3 /*-
4  * Copyright (c) 1995, 2000 The NetBSD Foundation, Inc.
5  * All rights reserved.
6  *
7  * This code is derived from software contributed to The NetBSD Foundation
8  * by Frank van der Linden.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  * 3. All advertising materials mentioning features or use of this software
19  *    must display the following acknowledgement:
20  *	This product includes software developed by the NetBSD
21  *	Foundation, Inc. and its contributors.
22  * 4. Neither the name of The NetBSD Foundation nor the names of its
23  *    contributors may be used to endorse or promote products derived
24  *    from this software without specific prior written permission.
25  *
26  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
27  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
28  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
29  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
30  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36  * POSSIBILITY OF SUCH DAMAGE.
37  */
38 
39 #include <sys/cdefs.h>
40 __KERNEL_RCSID(0, "$NetBSD: linux_machdep.c,v 1.68 2001/11/13 02:08:38 lukem Exp $");
41 
42 #if defined(_KERNEL_OPT)
43 #include "opt_vm86.h"
44 #include "opt_user_ldt.h"
45 #endif
46 
47 #include <sys/param.h>
48 #include <sys/systm.h>
49 #include <sys/signalvar.h>
50 #include <sys/kernel.h>
51 #include <sys/map.h>
52 #include <sys/proc.h>
53 #include <sys/user.h>
54 #include <sys/buf.h>
55 #include <sys/reboot.h>
56 #include <sys/conf.h>
57 #include <sys/exec.h>
58 #include <sys/file.h>
59 #include <sys/callout.h>
60 #include <sys/malloc.h>
61 #include <sys/mbuf.h>
62 #include <sys/msgbuf.h>
63 #include <sys/mount.h>
64 #include <sys/vnode.h>
65 #include <sys/device.h>
66 #include <sys/syscallargs.h>
67 #include <sys/filedesc.h>
68 #include <sys/exec_elf.h>
69 #include <sys/disklabel.h>
70 #include <sys/ioctl.h>
71 #include <miscfs/specfs/specdev.h>
72 
73 #include <compat/linux/common/linux_types.h>
74 #include <compat/linux/common/linux_signal.h>
75 #include <compat/linux/common/linux_util.h>
76 #include <compat/linux/common/linux_ioctl.h>
77 #include <compat/linux/common/linux_hdio.h>
78 #include <compat/linux/common/linux_exec.h>
79 #include <compat/linux/common/linux_machdep.h>
80 
81 #include <compat/linux/linux_syscallargs.h>
82 
83 #include <machine/cpu.h>
84 #include <machine/cpufunc.h>
85 #include <machine/psl.h>
86 #include <machine/reg.h>
87 #include <machine/segments.h>
88 #include <machine/specialreg.h>
89 #include <machine/sysarch.h>
90 #include <machine/vm86.h>
91 #include <machine/vmparam.h>
92 
93 /*
94  * To see whether wscons is configured (for virtual console ioctl calls).
95  */
96 #if defined(_KERNEL_OPT)
97 #include "wsdisplay.h"
98 #endif
99 #if (NWSDISPLAY > 0)
100 #include <dev/wscons/wsconsio.h>
101 #include <dev/wscons/wsdisplay_usl_io.h>
102 #if defined(_KERNEL_OPT)
103 #include "opt_xserver.h"
104 #endif
105 #endif
106 
107 #ifdef USER_LDT
108 #include <machine/cpu.h>
109 int linux_read_ldt __P((struct proc *, struct linux_sys_modify_ldt_args *,
110     register_t *));
111 int linux_write_ldt __P((struct proc *, struct linux_sys_modify_ldt_args *,
112     register_t *));
113 #endif
114 
115 static struct biosdisk_info *fd2biosinfo __P((struct proc *, struct file *));
116 extern struct disklist *i386_alldisks;
117 extern const char *findblkname __P((int));
118 
119 /*
120  * Deal with some i386-specific things in the Linux emulation code.
121  */
122 
123 void
124 linux_setregs(p, epp, stack)
125 	struct proc *p;
126 	struct exec_package *epp;
127 	u_long stack;
128 {
129 	struct pcb *pcb = &p->p_addr->u_pcb;
130 
131 	setregs(p, epp, stack);
132 	if (i386_use_fxsave)
133 		pcb->pcb_savefpu.sv_xmm.sv_env.en_cw = __Linux_NPXCW__;
134 	else
135 		pcb->pcb_savefpu.sv_87.sv_env.en_cw = __Linux_NPXCW__;
136 }
137 
138 /*
139  * Send an interrupt to process.
140  *
141  * Stack is set up to allow sigcode stored
142  * in u. to call routine, followed by kcall
143  * to sigreturn routine below.  After sigreturn
144  * resets the signal mask, the stack, and the
145  * frame pointer, it returns to the user
146  * specified pc, psl.
147  */
148 
149 void
150 linux_sendsig(catcher, sig, mask, code)
151 	sig_t catcher;
152 	int sig;
153 	sigset_t *mask;
154 	u_long code;
155 {
156 	struct proc *p = curproc;
157 	struct trapframe *tf;
158 	struct linux_sigframe *fp, frame;
159 	int onstack;
160 
161 	tf = p->p_md.md_regs;
162 
163 	/* Do we need to jump onto the signal stack? */
164 	onstack =
165 	    (p->p_sigctx.ps_sigstk.ss_flags & (SS_DISABLE | SS_ONSTACK)) == 0 &&
166 	    (SIGACTION(p, sig).sa_flags & SA_ONSTACK) != 0;
167 
168 	/* Allocate space for the signal handler context. */
169 	if (onstack)
170 		fp = (struct linux_sigframe *)((caddr_t)p->p_sigctx.ps_sigstk.ss_sp +
171 					  p->p_sigctx.ps_sigstk.ss_size);
172 	else
173 		fp = (struct linux_sigframe *)tf->tf_esp;
174 	fp--;
175 
176 	/* Build stack frame for signal trampoline. */
177 	frame.sf_handler = catcher;
178 	frame.sf_sig = native_to_linux_sig[sig];
179 
180 	/* Save register context. */
181 #ifdef VM86
182 	if (tf->tf_eflags & PSL_VM) {
183 		frame.sf_sc.sc_gs = tf->tf_vm86_gs;
184 		frame.sf_sc.sc_fs = tf->tf_vm86_fs;
185 		frame.sf_sc.sc_es = tf->tf_vm86_es;
186 		frame.sf_sc.sc_ds = tf->tf_vm86_ds;
187 		frame.sf_sc.sc_eflags = get_vflags(p);
188 	} else
189 #endif
190 	{
191 		frame.sf_sc.sc_gs = tf->tf_gs;
192 		frame.sf_sc.sc_fs = tf->tf_fs;
193 		frame.sf_sc.sc_es = tf->tf_es;
194 		frame.sf_sc.sc_ds = tf->tf_ds;
195 		frame.sf_sc.sc_eflags = tf->tf_eflags;
196 	}
197 	frame.sf_sc.sc_edi = tf->tf_edi;
198 	frame.sf_sc.sc_esi = tf->tf_esi;
199 	frame.sf_sc.sc_ebp = tf->tf_ebp;
200 	frame.sf_sc.sc_ebx = tf->tf_ebx;
201 	frame.sf_sc.sc_edx = tf->tf_edx;
202 	frame.sf_sc.sc_ecx = tf->tf_ecx;
203 	frame.sf_sc.sc_eax = tf->tf_eax;
204 	frame.sf_sc.sc_eip = tf->tf_eip;
205 	frame.sf_sc.sc_cs = tf->tf_cs;
206 	frame.sf_sc.sc_esp_at_signal = tf->tf_esp;
207 	frame.sf_sc.sc_ss = tf->tf_ss;
208 	frame.sf_sc.sc_err = tf->tf_err;
209 	frame.sf_sc.sc_trapno = tf->tf_trapno;
210 
211 	/* Save signal stack. */
212 	/* Linux doesn't save the onstack flag in sigframe */
213 
214 	/* Save signal mask. */
215 	native_to_linux_old_sigset(mask, &frame.sf_sc.sc_mask);
216 
217 	if (copyout(&frame, fp, sizeof(frame)) != 0) {
218 		/*
219 		 * Process has trashed its stack; give it an illegal
220 		 * instruction to halt it in its tracks.
221 		 */
222 		sigexit(p, SIGILL);
223 		/* NOTREACHED */
224 	}
225 
226 	/*
227 	 * Build context to run handler in.
228 	 */
229 	tf->tf_es = GSEL(GUDATA_SEL, SEL_UPL);
230 	tf->tf_ds = GSEL(GUDATA_SEL, SEL_UPL);
231 	tf->tf_eip = (int)p->p_sigctx.ps_sigcode;
232 	tf->tf_cs = GSEL(GUCODE_SEL, SEL_UPL);
233 	tf->tf_eflags &= ~(PSL_T|PSL_VM|PSL_AC);
234 	tf->tf_esp = (int)fp;
235 	tf->tf_ss = GSEL(GUDATA_SEL, SEL_UPL);
236 
237 	/* Remember that we're now on the signal stack. */
238 	if (onstack)
239 		p->p_sigctx.ps_sigstk.ss_flags |= SS_ONSTACK;
240 }
241 
242 /*
243  * System call to cleanup state after a signal
244  * has been taken.  Reset signal mask and
245  * stack state from context left by sendsig (above).
246  * Return to previous pc and psl as specified by
247  * context left by sendsig. Check carefully to
248  * make sure that the user has not modified the
249  * psl to gain improper privileges or to cause
250  * a machine fault.
251  */
252 int
253 linux_sys_rt_sigreturn(p, v, retval)
254 	struct proc *p;
255 	void *v;
256 	register_t *retval;
257 {
258 	/* XXX XAX write me */
259 	return(ENOSYS);
260 }
261 
262 int
263 linux_sys_sigreturn(p, v, retval)
264 	struct proc *p;
265 	void *v;
266 	register_t *retval;
267 {
268 	struct linux_sys_sigreturn_args /* {
269 		syscallarg(struct linux_sigcontext *) scp;
270 	} */ *uap = v;
271 	struct linux_sigcontext *scp, context;
272 	struct trapframe *tf;
273 	sigset_t mask;
274 	ssize_t ss_gap;
275 
276 	/*
277 	 * The trampoline code hands us the context.
278 	 * It is unsafe to keep track of it ourselves, in the event that a
279 	 * program jumps out of a signal handler.
280 	 */
281 	scp = SCARG(uap, scp);
282 	if (copyin((caddr_t)scp, &context, sizeof(*scp)) != 0)
283 		return (EFAULT);
284 
285 	/* Restore register context. */
286 	tf = p->p_md.md_regs;
287 #ifdef VM86
288 	if (context.sc_eflags & PSL_VM) {
289 		tf->tf_vm86_gs = context.sc_gs;
290 		tf->tf_vm86_fs = context.sc_fs;
291 		tf->tf_vm86_es = context.sc_es;
292 		tf->tf_vm86_ds = context.sc_ds;
293 		set_vflags(p, context.sc_eflags);
294 	} else
295 #endif
296 	{
297 		/*
298 		 * Check for security violations.  If we're returning to
299 		 * protected mode, the CPU will validate the segment registers
300 		 * automatically and generate a trap on violations.  We handle
301 		 * the trap, rather than doing all of the checking here.
302 		 */
303 		if (((context.sc_eflags ^ tf->tf_eflags) & PSL_USERSTATIC) != 0 ||
304 		    !USERMODE(context.sc_cs, context.sc_eflags))
305 			return (EINVAL);
306 
307 		/* %fs and %gs were restored by the trampoline. */
308 		tf->tf_es = context.sc_es;
309 		tf->tf_ds = context.sc_ds;
310 		tf->tf_eflags = context.sc_eflags;
311 	}
312 	tf->tf_edi = context.sc_edi;
313 	tf->tf_esi = context.sc_esi;
314 	tf->tf_ebp = context.sc_ebp;
315 	tf->tf_ebx = context.sc_ebx;
316 	tf->tf_edx = context.sc_edx;
317 	tf->tf_ecx = context.sc_ecx;
318 	tf->tf_eax = context.sc_eax;
319 	tf->tf_eip = context.sc_eip;
320 	tf->tf_cs = context.sc_cs;
321 	tf->tf_esp = context.sc_esp_at_signal;
322 	tf->tf_ss = context.sc_ss;
323 
324 	/* Restore signal stack. */
325 	/*
326 	 * Linux really does it this way; it doesn't have space in sigframe
327 	 * to save the onstack flag.
328 	 */
329 	ss_gap = (ssize_t)
330 	    ((caddr_t) context.sc_esp_at_signal - (caddr_t) p->p_sigctx.ps_sigstk.ss_sp);
331 	if (ss_gap >= 0  && ss_gap < p->p_sigctx.ps_sigstk.ss_size)
332 		p->p_sigctx.ps_sigstk.ss_flags |= SS_ONSTACK;
333 	else
334 		p->p_sigctx.ps_sigstk.ss_flags &= ~SS_ONSTACK;
335 
336 	/* Restore signal mask. */
337 	linux_old_to_native_sigset(&context.sc_mask, &mask);
338 	(void) sigprocmask1(p, SIG_SETMASK, &mask, 0);
339 
340 	return (EJUSTRETURN);
341 }
342 
343 #ifdef USER_LDT
344 
345 int
346 linux_read_ldt(p, uap, retval)
347 	struct proc *p;
348 	struct linux_sys_modify_ldt_args /* {
349 		syscallarg(int) func;
350 		syscallarg(void *) ptr;
351 		syscallarg(size_t) bytecount;
352 	} */ *uap;
353 	register_t *retval;
354 {
355 	struct i386_get_ldt_args gl;
356 	int error;
357 	caddr_t sg;
358 	char *parms;
359 
360 	sg = stackgap_init(p->p_emul);
361 
362 	gl.start = 0;
363 	gl.desc = SCARG(uap, ptr);
364 	gl.num = SCARG(uap, bytecount) / sizeof(union descriptor);
365 
366 	parms = stackgap_alloc(&sg, sizeof(gl));
367 
368 	if ((error = copyout(&gl, parms, sizeof(gl))) != 0)
369 		return (error);
370 
371 	if ((error = i386_get_ldt(p, parms, retval)) != 0)
372 		return (error);
373 
374 	*retval *= sizeof(union descriptor);
375 	return (0);
376 }
377 
378 struct linux_ldt_info {
379 	u_int entry_number;
380 	u_long base_addr;
381 	u_int limit;
382 	u_int seg_32bit:1;
383 	u_int contents:2;
384 	u_int read_exec_only:1;
385 	u_int limit_in_pages:1;
386 	u_int seg_not_present:1;
387 };
388 
389 int
390 linux_write_ldt(p, uap, retval)
391 	struct proc *p;
392 	struct linux_sys_modify_ldt_args /* {
393 		syscallarg(int) func;
394 		syscallarg(void *) ptr;
395 		syscallarg(size_t) bytecount;
396 	} */ *uap;
397 	register_t *retval;
398 {
399 	struct linux_ldt_info ldt_info;
400 	struct segment_descriptor sd;
401 	struct i386_set_ldt_args sl;
402 	int error;
403 	caddr_t sg;
404 	char *parms;
405 
406 	if (SCARG(uap, bytecount) != sizeof(ldt_info))
407 		return (EINVAL);
408 	if ((error = copyin(SCARG(uap, ptr), &ldt_info, sizeof(ldt_info))) != 0)
409 		return error;
410 	if (ldt_info.contents == 3)
411 		return (EINVAL);
412 
413 	sg = stackgap_init(p->p_emul);
414 
415 	sd.sd_lobase = ldt_info.base_addr & 0xffffff;
416 	sd.sd_hibase = (ldt_info.base_addr >> 24) & 0xff;
417 	sd.sd_lolimit = ldt_info.limit & 0xffff;
418 	sd.sd_hilimit = (ldt_info.limit >> 16) & 0xf;
419 	sd.sd_type =
420 	    16 | (ldt_info.contents << 2) | (!ldt_info.read_exec_only << 1);
421 	sd.sd_dpl = SEL_UPL;
422 	sd.sd_p = !ldt_info.seg_not_present;
423 	sd.sd_def32 = ldt_info.seg_32bit;
424 	sd.sd_gran = ldt_info.limit_in_pages;
425 
426 	sl.start = ldt_info.entry_number;
427 	sl.desc = stackgap_alloc(&sg, sizeof(sd));
428 	sl.num = 1;
429 
430 #if 0
431 	printf("linux_write_ldt: idx=%d, base=%x, limit=%x\n",
432 	    ldt_info.entry_number, ldt_info.base_addr, ldt_info.limit);
433 #endif
434 
435 	parms = stackgap_alloc(&sg, sizeof(sl));
436 
437 	if ((error = copyout(&sd, sl.desc, sizeof(sd))) != 0)
438 		return (error);
439 	if ((error = copyout(&sl, parms, sizeof(sl))) != 0)
440 		return (error);
441 
442 	if ((error = i386_set_ldt(p, parms, retval)) != 0)
443 		return (error);
444 
445 	*retval = 0;
446 	return (0);
447 }
448 
449 #endif /* USER_LDT */
450 
451 int
452 linux_sys_modify_ldt(p, v, retval)
453 	struct proc *p;
454 	void *v;
455 	register_t *retval;
456 {
457 	struct linux_sys_modify_ldt_args /* {
458 		syscallarg(int) func;
459 		syscallarg(void *) ptr;
460 		syscallarg(size_t) bytecount;
461 	} */ *uap = v;
462 
463 	switch (SCARG(uap, func)) {
464 #ifdef USER_LDT
465 	case 0:
466 		return (linux_read_ldt(p, uap, retval));
467 
468 	case 1:
469 		return (linux_write_ldt(p, uap, retval));
470 #endif /* USER_LDT */
471 
472 	default:
473 		return (ENOSYS);
474 	}
475 }
476 
477 /*
478  * XXX Pathetic hack to make svgalib work. This will fake the major
479  * device number of an opened VT so that svgalib likes it. grmbl.
480  * Should probably do it 'wrong the right way' and use a mapping
481  * array for all major device numbers, and map linux_mknod too.
482  */
483 dev_t
484 linux_fakedev(dev)
485 	dev_t dev;
486 {
487 #if (NWSDISPLAY > 0)
488 	if (major(dev) == NETBSD_WSCONS_MAJOR)
489 		return makedev(LINUX_CONS_MAJOR, (minor(dev) + 1));
490 #endif
491 	return dev;
492 }
493 
494 #if (NWSDISPLAY > 0)
495 /*
496  * That's not complete, but enough to get an X server running.
497  */
498 #define NR_KEYS 128
499 static const u_short plain_map[NR_KEYS] = {
500 	0x0200,	0x001b,	0x0031,	0x0032,	0x0033,	0x0034,	0x0035,	0x0036,
501 	0x0037,	0x0038,	0x0039,	0x0030,	0x002d,	0x003d,	0x007f,	0x0009,
502 	0x0b71,	0x0b77,	0x0b65,	0x0b72,	0x0b74,	0x0b79,	0x0b75,	0x0b69,
503 	0x0b6f,	0x0b70,	0x005b,	0x005d,	0x0201,	0x0702,	0x0b61,	0x0b73,
504 	0x0b64,	0x0b66,	0x0b67,	0x0b68,	0x0b6a,	0x0b6b,	0x0b6c,	0x003b,
505 	0x0027,	0x0060,	0x0700,	0x005c,	0x0b7a,	0x0b78,	0x0b63,	0x0b76,
506 	0x0b62,	0x0b6e,	0x0b6d,	0x002c,	0x002e,	0x002f,	0x0700,	0x030c,
507 	0x0703,	0x0020,	0x0207,	0x0100,	0x0101,	0x0102,	0x0103,	0x0104,
508 	0x0105,	0x0106,	0x0107,	0x0108,	0x0109,	0x0208,	0x0209,	0x0307,
509 	0x0308,	0x0309,	0x030b,	0x0304,	0x0305,	0x0306,	0x030a,	0x0301,
510 	0x0302,	0x0303,	0x0300,	0x0310,	0x0206,	0x0200,	0x003c,	0x010a,
511 	0x010b,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
512 	0x030e,	0x0702,	0x030d,	0x001c,	0x0701,	0x0205,	0x0114,	0x0603,
513 	0x0118,	0x0601,	0x0602,	0x0117,	0x0600,	0x0119,	0x0115,	0x0116,
514 	0x011a,	0x010c,	0x010d,	0x011b,	0x011c,	0x0110,	0x0311,	0x011d,
515 	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
516 }, shift_map[NR_KEYS] = {
517 	0x0200,	0x001b,	0x0021,	0x0040,	0x0023,	0x0024,	0x0025,	0x005e,
518 	0x0026,	0x002a,	0x0028,	0x0029,	0x005f,	0x002b,	0x007f,	0x0009,
519 	0x0b51,	0x0b57,	0x0b45,	0x0b52,	0x0b54,	0x0b59,	0x0b55,	0x0b49,
520 	0x0b4f,	0x0b50,	0x007b,	0x007d,	0x0201,	0x0702,	0x0b41,	0x0b53,
521 	0x0b44,	0x0b46,	0x0b47,	0x0b48,	0x0b4a,	0x0b4b,	0x0b4c,	0x003a,
522 	0x0022,	0x007e,	0x0700,	0x007c,	0x0b5a,	0x0b58,	0x0b43,	0x0b56,
523 	0x0b42,	0x0b4e,	0x0b4d,	0x003c,	0x003e,	0x003f,	0x0700,	0x030c,
524 	0x0703,	0x0020,	0x0207,	0x010a,	0x010b,	0x010c,	0x010d,	0x010e,
525 	0x010f,	0x0110,	0x0111,	0x0112,	0x0113,	0x0213,	0x0203,	0x0307,
526 	0x0308,	0x0309,	0x030b,	0x0304,	0x0305,	0x0306,	0x030a,	0x0301,
527 	0x0302,	0x0303,	0x0300,	0x0310,	0x0206,	0x0200,	0x003e,	0x010a,
528 	0x010b,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
529 	0x030e,	0x0702,	0x030d,	0x0200,	0x0701,	0x0205,	0x0114,	0x0603,
530 	0x020b,	0x0601,	0x0602,	0x0117,	0x0600,	0x020a,	0x0115,	0x0116,
531 	0x011a,	0x010c,	0x010d,	0x011b,	0x011c,	0x0110,	0x0311,	0x011d,
532 	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
533 }, altgr_map[NR_KEYS] = {
534 	0x0200,	0x0200,	0x0200,	0x0040,	0x0200,	0x0024,	0x0200,	0x0200,
535 	0x007b,	0x005b,	0x005d,	0x007d,	0x005c,	0x0200,	0x0200,	0x0200,
536 	0x0b71,	0x0b77,	0x0918,	0x0b72,	0x0b74,	0x0b79,	0x0b75,	0x0b69,
537 	0x0b6f,	0x0b70,	0x0200,	0x007e,	0x0201,	0x0702,	0x0914,	0x0b73,
538 	0x0917,	0x0919,	0x0b67,	0x0b68,	0x0b6a,	0x0b6b,	0x0b6c,	0x0200,
539 	0x0200,	0x0200,	0x0700,	0x0200,	0x0b7a,	0x0b78,	0x0916,	0x0b76,
540 	0x0915,	0x0b6e,	0x0b6d,	0x0200,	0x0200,	0x0200,	0x0700,	0x030c,
541 	0x0703,	0x0200,	0x0207,	0x050c,	0x050d,	0x050e,	0x050f,	0x0510,
542 	0x0511,	0x0512,	0x0513,	0x0514,	0x0515,	0x0208,	0x0202,	0x0911,
543 	0x0912,	0x0913,	0x030b,	0x090e,	0x090f,	0x0910,	0x030a,	0x090b,
544 	0x090c,	0x090d,	0x090a,	0x0310,	0x0206,	0x0200,	0x007c,	0x0516,
545 	0x0517,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
546 	0x030e,	0x0702,	0x030d,	0x0200,	0x0701,	0x0205,	0x0114,	0x0603,
547 	0x0118,	0x0601,	0x0602,	0x0117,	0x0600,	0x0119,	0x0115,	0x0116,
548 	0x011a,	0x010c,	0x010d,	0x011b,	0x011c,	0x0110,	0x0311,	0x011d,
549 	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
550 }, ctrl_map[NR_KEYS] = {
551 	0x0200,	0x0200,	0x0200,	0x0000,	0x001b,	0x001c,	0x001d,	0x001e,
552 	0x001f,	0x007f,	0x0200,	0x0200,	0x001f,	0x0200,	0x0008,	0x0200,
553 	0x0011,	0x0017,	0x0005,	0x0012,	0x0014,	0x0019,	0x0015,	0x0009,
554 	0x000f,	0x0010,	0x001b,	0x001d,	0x0201,	0x0702,	0x0001,	0x0013,
555 	0x0004,	0x0006,	0x0007,	0x0008,	0x000a,	0x000b,	0x000c,	0x0200,
556 	0x0007,	0x0000,	0x0700,	0x001c,	0x001a,	0x0018,	0x0003,	0x0016,
557 	0x0002,	0x000e,	0x000d,	0x0200,	0x020e,	0x007f,	0x0700,	0x030c,
558 	0x0703,	0x0000,	0x0207,	0x0100,	0x0101,	0x0102,	0x0103,	0x0104,
559 	0x0105,	0x0106,	0x0107,	0x0108,	0x0109,	0x0208,	0x0204,	0x0307,
560 	0x0308,	0x0309,	0x030b,	0x0304,	0x0305,	0x0306,	0x030a,	0x0301,
561 	0x0302,	0x0303,	0x0300,	0x0310,	0x0206,	0x0200,	0x0200,	0x010a,
562 	0x010b,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
563 	0x030e,	0x0702,	0x030d,	0x001c,	0x0701,	0x0205,	0x0114,	0x0603,
564 	0x0118,	0x0601,	0x0602,	0x0117,	0x0600,	0x0119,	0x0115,	0x0116,
565 	0x011a,	0x010c,	0x010d,	0x011b,	0x011c,	0x0110,	0x0311,	0x011d,
566 	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
567 };
568 
569 const u_short * const linux_keytabs[] = {
570 	plain_map, shift_map, altgr_map, altgr_map, ctrl_map
571 };
572 #endif
573 
574 static struct biosdisk_info *
575 fd2biosinfo(p, fp)
576 	struct proc *p;
577 	struct file *fp;
578 {
579 	struct vnode *vp;
580 	const char *blkname;
581 	char diskname[16];
582 	int i;
583 	struct nativedisk_info *nip;
584 	struct disklist *dl = i386_alldisks;
585 
586 	if (fp->f_type != DTYPE_VNODE)
587 		return NULL;
588 	vp = (struct vnode *)fp->f_data;
589 
590 	if (vp->v_type != VBLK)
591 		return NULL;
592 
593 	blkname = findblkname(major(vp->v_rdev));
594 	snprintf(diskname, sizeof diskname, "%s%u", blkname,
595 	    DISKUNIT(vp->v_rdev));
596 
597 	for (i = 0; i < dl->dl_nnativedisks; i++) {
598 		nip = &dl->dl_nativedisks[i];
599 		if (strcmp(diskname, nip->ni_devname))
600 			continue;
601 		if (nip->ni_nmatches != 0)
602 			return &dl->dl_biosdisks[nip->ni_biosmatches[0]];
603 	}
604 
605 	return NULL;
606 }
607 
608 
609 /*
610  * We come here in a last attempt to satisfy a Linux ioctl() call
611  */
612 int
613 linux_machdepioctl(p, v, retval)
614 	struct proc *p;
615 	void *v;
616 	register_t *retval;
617 {
618 	struct linux_sys_ioctl_args /* {
619 		syscallarg(int) fd;
620 		syscallarg(u_long) com;
621 		syscallarg(caddr_t) data;
622 	} */ *uap = v;
623 	struct sys_ioctl_args bia;
624 	u_long com;
625 	int error, error1;
626 #if (NWSDISPLAY > 0)
627 	struct vt_mode lvt;
628 	caddr_t bvtp, sg;
629 	struct kbentry kbe;
630 #endif
631 	struct linux_hd_geometry hdg;
632 	struct linux_hd_big_geometry hdg_big;
633 	struct biosdisk_info *bip;
634 	struct filedesc *fdp;
635 	struct file *fp;
636 	int fd;
637 	struct disklabel label, *labp;
638 	struct partinfo partp;
639 	int (*ioctlf) __P((struct file *, u_long, caddr_t, struct proc *));
640 	u_long start, biostotal, realtotal;
641 	u_char heads, sectors;
642 	u_int cylinders;
643 	struct ioctl_pt pt;
644 
645 	fd = SCARG(uap, fd);
646 	SCARG(&bia, fd) = fd;
647 	SCARG(&bia, data) = SCARG(uap, data);
648 	com = SCARG(uap, com);
649 
650 	fdp = p->p_fd;
651 
652 	if ((fp = fd_getfile(fdp, fd)) == NULL)
653 		return (EBADF);
654 
655 	switch (com) {
656 #if (NWSDISPLAY > 0)
657 	case LINUX_KDGKBMODE:
658 		com = KDGKBMODE;
659 		break;
660 	case LINUX_KDSKBMODE:
661 		com = KDSKBMODE;
662 		if ((unsigned)SCARG(uap, data) == LINUX_K_MEDIUMRAW)
663 			SCARG(&bia, data) = (caddr_t)K_RAW;
664 		break;
665 	case LINUX_KIOCSOUND:
666 		SCARG(&bia, data) =
667 		    (caddr_t)(((unsigned long)SCARG(&bia, data)) & 0xffff);
668 		/* fall through */
669 	case LINUX_KDMKTONE:
670 		com = KDMKTONE;
671 		break;
672 	case LINUX_KDSETMODE:
673 		com = KDSETMODE;
674 		break;
675 	case LINUX_KDGETMODE:
676 		/* KD_* values are equal to the wscons numbers */
677 		com = WSDISPLAYIO_GMODE;
678 		break;
679 	case LINUX_KDENABIO:
680 		com = KDENABIO;
681 		break;
682 	case LINUX_KDDISABIO:
683 		com = KDDISABIO;
684 		break;
685 	case LINUX_KDGETLED:
686 		com = KDGETLED;
687 		break;
688 	case LINUX_KDSETLED:
689 		com = KDSETLED;
690 		break;
691 	case LINUX_VT_OPENQRY:
692 		com = VT_OPENQRY;
693 		break;
694 	case LINUX_VT_GETMODE:
695 		SCARG(&bia, com) = VT_GETMODE;
696 		if ((error = sys_ioctl(p, &bia, retval)))
697 			return error;
698 		if ((error = copyin(SCARG(uap, data), (caddr_t)&lvt,
699 		    sizeof (struct vt_mode))))
700 			return error;
701 		lvt.relsig = native_to_linux_sig[lvt.relsig];
702 		lvt.acqsig = native_to_linux_sig[lvt.acqsig];
703 		lvt.frsig = native_to_linux_sig[lvt.frsig];
704 		return copyout((caddr_t)&lvt, SCARG(uap, data),
705 		    sizeof (struct vt_mode));
706 	case LINUX_VT_SETMODE:
707 		com = VT_SETMODE;
708 		if ((error = copyin(SCARG(uap, data), (caddr_t)&lvt,
709 		    sizeof (struct vt_mode))))
710 			return error;
711 		lvt.relsig = linux_to_native_sig[lvt.relsig];
712 		lvt.acqsig = linux_to_native_sig[lvt.acqsig];
713 		lvt.frsig = linux_to_native_sig[lvt.frsig];
714 		sg = stackgap_init(p->p_emul);
715 		bvtp = stackgap_alloc(&sg, sizeof (struct vt_mode));
716 		if ((error = copyout(&lvt, bvtp, sizeof (struct vt_mode))))
717 			return error;
718 		SCARG(&bia, data) = bvtp;
719 		break;
720 	case LINUX_VT_DISALLOCATE:
721 		/* XXX should use WSDISPLAYIO_DELSCREEN */
722 		return 0;
723 	case LINUX_VT_RELDISP:
724 		com = VT_RELDISP;
725 		break;
726 	case LINUX_VT_ACTIVATE:
727 		com = VT_ACTIVATE;
728 		break;
729 	case LINUX_VT_WAITACTIVE:
730 		com = VT_WAITACTIVE;
731 		break;
732 	case LINUX_VT_GETSTATE:
733 		com = VT_GETSTATE;
734 		break;
735 	case LINUX_KDGKBTYPE:
736 		/* This is what Linux does. */
737 		return (subyte(SCARG(uap, data), KB_101));
738 	case LINUX_KDGKBENT:
739 		/*
740 		 * The Linux KDGKBENT ioctl is different from the
741 		 * SYSV original. So we handle it in machdep code.
742 		 * XXX We should use keyboard mapping information
743 		 * from wsdisplay, but this would be expensive.
744 		 */
745 		if ((error = copyin(SCARG(uap, data), &kbe,
746 				    sizeof(struct kbentry))))
747 			return (error);
748 		if (kbe.kb_table >= sizeof(linux_keytabs) / sizeof(u_short *)
749 		    || kbe.kb_index >= NR_KEYS)
750 			return (EINVAL);
751 		kbe.kb_value = linux_keytabs[kbe.kb_table][kbe.kb_index];
752 		return (copyout(&kbe, SCARG(uap, data),
753 				sizeof(struct kbentry)));
754 #endif
755 	case LINUX_HDIO_GETGEO:
756 	case LINUX_HDIO_GETGEO_BIG:
757 		/*
758 		 * Try to mimic Linux behaviour: return the BIOS geometry
759 		 * if possible (extending its # of cylinders if it's beyond
760 		 * the 1023 limit), fall back to the MI geometry (i.e.
761 		 * the real geometry) if not found, by returning an
762 		 * error. See common/linux_hdio.c
763 		 */
764 		FILE_USE(fp);
765 		bip = fd2biosinfo(p, fp);
766 		ioctlf = fp->f_ops->fo_ioctl;
767 		error = ioctlf(fp, DIOCGDEFLABEL, (caddr_t)&label, p);
768 		error1 = ioctlf(fp, DIOCGPART, (caddr_t)&partp, p);
769 		FILE_UNUSE(fp, p);
770 		if (error != 0 && error1 != 0)
771 			return error1;
772 		labp = error != 0 ? &label : partp.disklab;
773 		start = error1 != 0 ? partp.part->p_offset : 0;
774 		if (bip != NULL && bip->bi_head != 0 && bip->bi_sec != 0
775 		    && bip->bi_cyl != 0) {
776 			heads = bip->bi_head;
777 			sectors = bip->bi_sec;
778 			cylinders = bip->bi_cyl;
779 			biostotal = heads * sectors * cylinders;
780 			realtotal = labp->d_ntracks * labp->d_nsectors *
781 			    labp->d_ncylinders;
782 			if (realtotal > biostotal)
783 				cylinders = realtotal / (heads * sectors);
784 		} else {
785 			heads = labp->d_ntracks;
786 			cylinders = labp->d_ncylinders;
787 			sectors = labp->d_nsectors;
788 		}
789 		if (com == LINUX_HDIO_GETGEO) {
790 			hdg.start = start;
791 			hdg.heads = heads;
792 			hdg.cylinders = cylinders;
793 			hdg.sectors = sectors;
794 			return copyout(&hdg, SCARG(uap, data), sizeof hdg);
795 		} else {
796 			hdg_big.start = start;
797 			hdg_big.heads = heads;
798 			hdg_big.cylinders = cylinders;
799 			hdg_big.sectors = sectors;
800 			return copyout(&hdg_big, SCARG(uap, data),
801 			    sizeof hdg_big);
802 		}
803 		return 0;
804 
805 	default:
806 		/*
807 		 * Unknown to us. If it's on a device, just pass it through
808 		 * using PTIOCLINUX, the device itself might be able to
809 		 * make some sense of it.
810 		 * XXX hack: if the function returns EJUSTRETURN,
811 		 * it has stuffed a sysctl return value in pt.data.
812 		 */
813 		FILE_USE(fp);
814 		ioctlf = fp->f_ops->fo_ioctl;
815 		pt.com = SCARG(uap, com);
816 		pt.data = SCARG(uap, data);
817 		error = ioctlf(fp, PTIOCLINUX, (caddr_t)&pt, p);
818 		FILE_UNUSE(fp, p);
819 		if (error == EJUSTRETURN) {
820 			retval[0] = (register_t)pt.data;
821 			error = 0;
822 		}
823 
824 		if (error == ENOTTY)
825 			printf("linux_machdepioctl: invalid ioctl %08lx\n",
826 			    com);
827 		return error;
828 	}
829 	SCARG(&bia, com) = com;
830 	return sys_ioctl(p, &bia, retval);
831 }
832 
833 /*
834  * Set I/O permissions for a process. Just set the maximum level
835  * right away (ignoring the argument), otherwise we would have
836  * to rely on I/O permission maps, which are not implemented.
837  */
838 int
839 linux_sys_iopl(p, v, retval)
840 	struct proc *p;
841 	void *v;
842 	register_t *retval;
843 {
844 #if 0
845 	struct linux_sys_iopl_args /* {
846 		syscallarg(int) level;
847 	} */ *uap = v;
848 #endif
849 	struct trapframe *fp = p->p_md.md_regs;
850 
851 	if (suser(p->p_ucred, &p->p_acflag) != 0)
852 		return EPERM;
853 	fp->tf_eflags |= PSL_IOPL;
854 	*retval = 0;
855 	return 0;
856 }
857 
858 /*
859  * See above. If a root process tries to set access to an I/O port,
860  * just let it have the whole range.
861  */
862 int
863 linux_sys_ioperm(p, v, retval)
864 	struct proc *p;
865 	void *v;
866 	register_t *retval;
867 {
868 	struct linux_sys_ioperm_args /* {
869 		syscallarg(unsigned int) lo;
870 		syscallarg(unsigned int) hi;
871 		syscallarg(int) val;
872 	} */ *uap = v;
873 	struct trapframe *fp = p->p_md.md_regs;
874 
875 	if (suser(p->p_ucred, &p->p_acflag) != 0)
876 		return EPERM;
877 	if (SCARG(uap, val))
878 		fp->tf_eflags |= PSL_IOPL;
879 	*retval = 0;
880 	return 0;
881 }
882