xref: /netbsd-src/sys/compat/linux/arch/i386/linux_machdep.c (revision 27578b9aac214cc7796ead81dcc5427e79d5f2a0)
1 /*	$NetBSD: linux_machdep.c,v 1.67 2001/08/02 22:04:57 thorpej Exp $	*/
2 
3 /*-
4  * Copyright (c) 1995, 2000 The NetBSD Foundation, Inc.
5  * All rights reserved.
6  *
7  * This code is derived from software contributed to The NetBSD Foundation
8  * by Frank van der Linden.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  * 3. All advertising materials mentioning features or use of this software
19  *    must display the following acknowledgement:
20  *	This product includes software developed by the NetBSD
21  *	Foundation, Inc. and its contributors.
22  * 4. Neither the name of The NetBSD Foundation nor the names of its
23  *    contributors may be used to endorse or promote products derived
24  *    from this software without specific prior written permission.
25  *
26  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
27  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
28  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
29  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
30  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36  * POSSIBILITY OF SUCH DAMAGE.
37  */
38 
39 #if defined(_KERNEL_OPT)
40 #include "opt_vm86.h"
41 #include "opt_user_ldt.h"
42 #endif
43 
44 #include <sys/param.h>
45 #include <sys/systm.h>
46 #include <sys/signalvar.h>
47 #include <sys/kernel.h>
48 #include <sys/map.h>
49 #include <sys/proc.h>
50 #include <sys/user.h>
51 #include <sys/buf.h>
52 #include <sys/reboot.h>
53 #include <sys/conf.h>
54 #include <sys/exec.h>
55 #include <sys/file.h>
56 #include <sys/callout.h>
57 #include <sys/malloc.h>
58 #include <sys/mbuf.h>
59 #include <sys/msgbuf.h>
60 #include <sys/mount.h>
61 #include <sys/vnode.h>
62 #include <sys/device.h>
63 #include <sys/syscallargs.h>
64 #include <sys/filedesc.h>
65 #include <sys/exec_elf.h>
66 #include <sys/disklabel.h>
67 #include <sys/ioctl.h>
68 #include <miscfs/specfs/specdev.h>
69 
70 #include <compat/linux/common/linux_types.h>
71 #include <compat/linux/common/linux_signal.h>
72 #include <compat/linux/common/linux_util.h>
73 #include <compat/linux/common/linux_ioctl.h>
74 #include <compat/linux/common/linux_hdio.h>
75 #include <compat/linux/common/linux_exec.h>
76 #include <compat/linux/common/linux_machdep.h>
77 
78 #include <compat/linux/linux_syscallargs.h>
79 
80 #include <machine/cpu.h>
81 #include <machine/cpufunc.h>
82 #include <machine/psl.h>
83 #include <machine/reg.h>
84 #include <machine/segments.h>
85 #include <machine/specialreg.h>
86 #include <machine/sysarch.h>
87 #include <machine/vm86.h>
88 #include <machine/vmparam.h>
89 
90 /*
91  * To see whether wscons is configured (for virtual console ioctl calls).
92  */
93 #if defined(_KERNEL_OPT)
94 #include "wsdisplay.h"
95 #endif
96 #if (NWSDISPLAY > 0)
97 #include <dev/wscons/wsconsio.h>
98 #include <dev/wscons/wsdisplay_usl_io.h>
99 #if defined(_KERNEL_OPT)
100 #include "opt_xserver.h"
101 #endif
102 #endif
103 
104 #ifdef USER_LDT
105 #include <machine/cpu.h>
106 int linux_read_ldt __P((struct proc *, struct linux_sys_modify_ldt_args *,
107     register_t *));
108 int linux_write_ldt __P((struct proc *, struct linux_sys_modify_ldt_args *,
109     register_t *));
110 #endif
111 
112 static struct biosdisk_info *fd2biosinfo __P((struct proc *, struct file *));
113 extern struct disklist *i386_alldisks;
114 extern const char *findblkname __P((int));
115 
116 /*
117  * Deal with some i386-specific things in the Linux emulation code.
118  */
119 
120 void
121 linux_setregs(p, epp, stack)
122 	struct proc *p;
123 	struct exec_package *epp;
124 	u_long stack;
125 {
126 	struct pcb *pcb = &p->p_addr->u_pcb;
127 
128 	setregs(p, epp, stack);
129 	if (i386_use_fxsave)
130 		pcb->pcb_savefpu.sv_xmm.sv_env.en_cw = __Linux_NPXCW__;
131 	else
132 		pcb->pcb_savefpu.sv_87.sv_env.en_cw = __Linux_NPXCW__;
133 }
134 
135 /*
136  * Send an interrupt to process.
137  *
138  * Stack is set up to allow sigcode stored
139  * in u. to call routine, followed by kcall
140  * to sigreturn routine below.  After sigreturn
141  * resets the signal mask, the stack, and the
142  * frame pointer, it returns to the user
143  * specified pc, psl.
144  */
145 
146 void
147 linux_sendsig(catcher, sig, mask, code)
148 	sig_t catcher;
149 	int sig;
150 	sigset_t *mask;
151 	u_long code;
152 {
153 	struct proc *p = curproc;
154 	struct trapframe *tf;
155 	struct linux_sigframe *fp, frame;
156 	int onstack;
157 
158 	tf = p->p_md.md_regs;
159 
160 	/* Do we need to jump onto the signal stack? */
161 	onstack =
162 	    (p->p_sigctx.ps_sigstk.ss_flags & (SS_DISABLE | SS_ONSTACK)) == 0 &&
163 	    (SIGACTION(p, sig).sa_flags & SA_ONSTACK) != 0;
164 
165 	/* Allocate space for the signal handler context. */
166 	if (onstack)
167 		fp = (struct linux_sigframe *)((caddr_t)p->p_sigctx.ps_sigstk.ss_sp +
168 					  p->p_sigctx.ps_sigstk.ss_size);
169 	else
170 		fp = (struct linux_sigframe *)tf->tf_esp;
171 	fp--;
172 
173 	/* Build stack frame for signal trampoline. */
174 	frame.sf_handler = catcher;
175 	frame.sf_sig = native_to_linux_sig[sig];
176 
177 	/* Save register context. */
178 #ifdef VM86
179 	if (tf->tf_eflags & PSL_VM) {
180 		frame.sf_sc.sc_gs = tf->tf_vm86_gs;
181 		frame.sf_sc.sc_fs = tf->tf_vm86_fs;
182 		frame.sf_sc.sc_es = tf->tf_vm86_es;
183 		frame.sf_sc.sc_ds = tf->tf_vm86_ds;
184 		frame.sf_sc.sc_eflags = get_vflags(p);
185 	} else
186 #endif
187 	{
188 		frame.sf_sc.sc_gs = tf->tf_gs;
189 		frame.sf_sc.sc_fs = tf->tf_fs;
190 		frame.sf_sc.sc_es = tf->tf_es;
191 		frame.sf_sc.sc_ds = tf->tf_ds;
192 		frame.sf_sc.sc_eflags = tf->tf_eflags;
193 	}
194 	frame.sf_sc.sc_edi = tf->tf_edi;
195 	frame.sf_sc.sc_esi = tf->tf_esi;
196 	frame.sf_sc.sc_ebp = tf->tf_ebp;
197 	frame.sf_sc.sc_ebx = tf->tf_ebx;
198 	frame.sf_sc.sc_edx = tf->tf_edx;
199 	frame.sf_sc.sc_ecx = tf->tf_ecx;
200 	frame.sf_sc.sc_eax = tf->tf_eax;
201 	frame.sf_sc.sc_eip = tf->tf_eip;
202 	frame.sf_sc.sc_cs = tf->tf_cs;
203 	frame.sf_sc.sc_esp_at_signal = tf->tf_esp;
204 	frame.sf_sc.sc_ss = tf->tf_ss;
205 	frame.sf_sc.sc_err = tf->tf_err;
206 	frame.sf_sc.sc_trapno = tf->tf_trapno;
207 
208 	/* Save signal stack. */
209 	/* Linux doesn't save the onstack flag in sigframe */
210 
211 	/* Save signal mask. */
212 	native_to_linux_old_sigset(mask, &frame.sf_sc.sc_mask);
213 
214 	if (copyout(&frame, fp, sizeof(frame)) != 0) {
215 		/*
216 		 * Process has trashed its stack; give it an illegal
217 		 * instruction to halt it in its tracks.
218 		 */
219 		sigexit(p, SIGILL);
220 		/* NOTREACHED */
221 	}
222 
223 	/*
224 	 * Build context to run handler in.
225 	 */
226 	tf->tf_es = GSEL(GUDATA_SEL, SEL_UPL);
227 	tf->tf_ds = GSEL(GUDATA_SEL, SEL_UPL);
228 	tf->tf_eip = (int)p->p_sigctx.ps_sigcode;
229 	tf->tf_cs = GSEL(GUCODE_SEL, SEL_UPL);
230 	tf->tf_eflags &= ~(PSL_T|PSL_VM|PSL_AC);
231 	tf->tf_esp = (int)fp;
232 	tf->tf_ss = GSEL(GUDATA_SEL, SEL_UPL);
233 
234 	/* Remember that we're now on the signal stack. */
235 	if (onstack)
236 		p->p_sigctx.ps_sigstk.ss_flags |= SS_ONSTACK;
237 }
238 
239 /*
240  * System call to cleanup state after a signal
241  * has been taken.  Reset signal mask and
242  * stack state from context left by sendsig (above).
243  * Return to previous pc and psl as specified by
244  * context left by sendsig. Check carefully to
245  * make sure that the user has not modified the
246  * psl to gain improper privileges or to cause
247  * a machine fault.
248  */
249 int
250 linux_sys_rt_sigreturn(p, v, retval)
251 	struct proc *p;
252 	void *v;
253 	register_t *retval;
254 {
255 	/* XXX XAX write me */
256 	return(ENOSYS);
257 }
258 
259 int
260 linux_sys_sigreturn(p, v, retval)
261 	struct proc *p;
262 	void *v;
263 	register_t *retval;
264 {
265 	struct linux_sys_sigreturn_args /* {
266 		syscallarg(struct linux_sigcontext *) scp;
267 	} */ *uap = v;
268 	struct linux_sigcontext *scp, context;
269 	struct trapframe *tf;
270 	sigset_t mask;
271 	ssize_t ss_gap;
272 
273 	/*
274 	 * The trampoline code hands us the context.
275 	 * It is unsafe to keep track of it ourselves, in the event that a
276 	 * program jumps out of a signal handler.
277 	 */
278 	scp = SCARG(uap, scp);
279 	if (copyin((caddr_t)scp, &context, sizeof(*scp)) != 0)
280 		return (EFAULT);
281 
282 	/* Restore register context. */
283 	tf = p->p_md.md_regs;
284 #ifdef VM86
285 	if (context.sc_eflags & PSL_VM) {
286 		tf->tf_vm86_gs = context.sc_gs;
287 		tf->tf_vm86_fs = context.sc_fs;
288 		tf->tf_vm86_es = context.sc_es;
289 		tf->tf_vm86_ds = context.sc_ds;
290 		set_vflags(p, context.sc_eflags);
291 	} else
292 #endif
293 	{
294 		/*
295 		 * Check for security violations.  If we're returning to
296 		 * protected mode, the CPU will validate the segment registers
297 		 * automatically and generate a trap on violations.  We handle
298 		 * the trap, rather than doing all of the checking here.
299 		 */
300 		if (((context.sc_eflags ^ tf->tf_eflags) & PSL_USERSTATIC) != 0 ||
301 		    !USERMODE(context.sc_cs, context.sc_eflags))
302 			return (EINVAL);
303 
304 		/* %fs and %gs were restored by the trampoline. */
305 		tf->tf_es = context.sc_es;
306 		tf->tf_ds = context.sc_ds;
307 		tf->tf_eflags = context.sc_eflags;
308 	}
309 	tf->tf_edi = context.sc_edi;
310 	tf->tf_esi = context.sc_esi;
311 	tf->tf_ebp = context.sc_ebp;
312 	tf->tf_ebx = context.sc_ebx;
313 	tf->tf_edx = context.sc_edx;
314 	tf->tf_ecx = context.sc_ecx;
315 	tf->tf_eax = context.sc_eax;
316 	tf->tf_eip = context.sc_eip;
317 	tf->tf_cs = context.sc_cs;
318 	tf->tf_esp = context.sc_esp_at_signal;
319 	tf->tf_ss = context.sc_ss;
320 
321 	/* Restore signal stack. */
322 	/*
323 	 * Linux really does it this way; it doesn't have space in sigframe
324 	 * to save the onstack flag.
325 	 */
326 	ss_gap = (ssize_t)
327 	    ((caddr_t) context.sc_esp_at_signal - (caddr_t) p->p_sigctx.ps_sigstk.ss_sp);
328 	if (ss_gap >= 0  && ss_gap < p->p_sigctx.ps_sigstk.ss_size)
329 		p->p_sigctx.ps_sigstk.ss_flags |= SS_ONSTACK;
330 	else
331 		p->p_sigctx.ps_sigstk.ss_flags &= ~SS_ONSTACK;
332 
333 	/* Restore signal mask. */
334 	linux_old_to_native_sigset(&context.sc_mask, &mask);
335 	(void) sigprocmask1(p, SIG_SETMASK, &mask, 0);
336 
337 	return (EJUSTRETURN);
338 }
339 
340 #ifdef USER_LDT
341 
342 int
343 linux_read_ldt(p, uap, retval)
344 	struct proc *p;
345 	struct linux_sys_modify_ldt_args /* {
346 		syscallarg(int) func;
347 		syscallarg(void *) ptr;
348 		syscallarg(size_t) bytecount;
349 	} */ *uap;
350 	register_t *retval;
351 {
352 	struct i386_get_ldt_args gl;
353 	int error;
354 	caddr_t sg;
355 	char *parms;
356 
357 	sg = stackgap_init(p->p_emul);
358 
359 	gl.start = 0;
360 	gl.desc = SCARG(uap, ptr);
361 	gl.num = SCARG(uap, bytecount) / sizeof(union descriptor);
362 
363 	parms = stackgap_alloc(&sg, sizeof(gl));
364 
365 	if ((error = copyout(&gl, parms, sizeof(gl))) != 0)
366 		return (error);
367 
368 	if ((error = i386_get_ldt(p, parms, retval)) != 0)
369 		return (error);
370 
371 	*retval *= sizeof(union descriptor);
372 	return (0);
373 }
374 
375 struct linux_ldt_info {
376 	u_int entry_number;
377 	u_long base_addr;
378 	u_int limit;
379 	u_int seg_32bit:1;
380 	u_int contents:2;
381 	u_int read_exec_only:1;
382 	u_int limit_in_pages:1;
383 	u_int seg_not_present:1;
384 };
385 
386 int
387 linux_write_ldt(p, uap, retval)
388 	struct proc *p;
389 	struct linux_sys_modify_ldt_args /* {
390 		syscallarg(int) func;
391 		syscallarg(void *) ptr;
392 		syscallarg(size_t) bytecount;
393 	} */ *uap;
394 	register_t *retval;
395 {
396 	struct linux_ldt_info ldt_info;
397 	struct segment_descriptor sd;
398 	struct i386_set_ldt_args sl;
399 	int error;
400 	caddr_t sg;
401 	char *parms;
402 
403 	if (SCARG(uap, bytecount) != sizeof(ldt_info))
404 		return (EINVAL);
405 	if ((error = copyin(SCARG(uap, ptr), &ldt_info, sizeof(ldt_info))) != 0)
406 		return error;
407 	if (ldt_info.contents == 3)
408 		return (EINVAL);
409 
410 	sg = stackgap_init(p->p_emul);
411 
412 	sd.sd_lobase = ldt_info.base_addr & 0xffffff;
413 	sd.sd_hibase = (ldt_info.base_addr >> 24) & 0xff;
414 	sd.sd_lolimit = ldt_info.limit & 0xffff;
415 	sd.sd_hilimit = (ldt_info.limit >> 16) & 0xf;
416 	sd.sd_type =
417 	    16 | (ldt_info.contents << 2) | (!ldt_info.read_exec_only << 1);
418 	sd.sd_dpl = SEL_UPL;
419 	sd.sd_p = !ldt_info.seg_not_present;
420 	sd.sd_def32 = ldt_info.seg_32bit;
421 	sd.sd_gran = ldt_info.limit_in_pages;
422 
423 	sl.start = ldt_info.entry_number;
424 	sl.desc = stackgap_alloc(&sg, sizeof(sd));
425 	sl.num = 1;
426 
427 #if 0
428 	printf("linux_write_ldt: idx=%d, base=%x, limit=%x\n",
429 	    ldt_info.entry_number, ldt_info.base_addr, ldt_info.limit);
430 #endif
431 
432 	parms = stackgap_alloc(&sg, sizeof(sl));
433 
434 	if ((error = copyout(&sd, sl.desc, sizeof(sd))) != 0)
435 		return (error);
436 	if ((error = copyout(&sl, parms, sizeof(sl))) != 0)
437 		return (error);
438 
439 	if ((error = i386_set_ldt(p, parms, retval)) != 0)
440 		return (error);
441 
442 	*retval = 0;
443 	return (0);
444 }
445 
446 #endif /* USER_LDT */
447 
448 int
449 linux_sys_modify_ldt(p, v, retval)
450 	struct proc *p;
451 	void *v;
452 	register_t *retval;
453 {
454 	struct linux_sys_modify_ldt_args /* {
455 		syscallarg(int) func;
456 		syscallarg(void *) ptr;
457 		syscallarg(size_t) bytecount;
458 	} */ *uap = v;
459 
460 	switch (SCARG(uap, func)) {
461 #ifdef USER_LDT
462 	case 0:
463 		return (linux_read_ldt(p, uap, retval));
464 
465 	case 1:
466 		return (linux_write_ldt(p, uap, retval));
467 #endif /* USER_LDT */
468 
469 	default:
470 		return (ENOSYS);
471 	}
472 }
473 
474 /*
475  * XXX Pathetic hack to make svgalib work. This will fake the major
476  * device number of an opened VT so that svgalib likes it. grmbl.
477  * Should probably do it 'wrong the right way' and use a mapping
478  * array for all major device numbers, and map linux_mknod too.
479  */
480 dev_t
481 linux_fakedev(dev)
482 	dev_t dev;
483 {
484 #if (NWSDISPLAY > 0)
485 	if (major(dev) == NETBSD_WSCONS_MAJOR)
486 		return makedev(LINUX_CONS_MAJOR, (minor(dev) + 1));
487 #endif
488 	return dev;
489 }
490 
491 #if (NWSDISPLAY > 0)
492 /*
493  * That's not complete, but enough to get an X server running.
494  */
495 #define NR_KEYS 128
496 static const u_short plain_map[NR_KEYS] = {
497 	0x0200,	0x001b,	0x0031,	0x0032,	0x0033,	0x0034,	0x0035,	0x0036,
498 	0x0037,	0x0038,	0x0039,	0x0030,	0x002d,	0x003d,	0x007f,	0x0009,
499 	0x0b71,	0x0b77,	0x0b65,	0x0b72,	0x0b74,	0x0b79,	0x0b75,	0x0b69,
500 	0x0b6f,	0x0b70,	0x005b,	0x005d,	0x0201,	0x0702,	0x0b61,	0x0b73,
501 	0x0b64,	0x0b66,	0x0b67,	0x0b68,	0x0b6a,	0x0b6b,	0x0b6c,	0x003b,
502 	0x0027,	0x0060,	0x0700,	0x005c,	0x0b7a,	0x0b78,	0x0b63,	0x0b76,
503 	0x0b62,	0x0b6e,	0x0b6d,	0x002c,	0x002e,	0x002f,	0x0700,	0x030c,
504 	0x0703,	0x0020,	0x0207,	0x0100,	0x0101,	0x0102,	0x0103,	0x0104,
505 	0x0105,	0x0106,	0x0107,	0x0108,	0x0109,	0x0208,	0x0209,	0x0307,
506 	0x0308,	0x0309,	0x030b,	0x0304,	0x0305,	0x0306,	0x030a,	0x0301,
507 	0x0302,	0x0303,	0x0300,	0x0310,	0x0206,	0x0200,	0x003c,	0x010a,
508 	0x010b,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
509 	0x030e,	0x0702,	0x030d,	0x001c,	0x0701,	0x0205,	0x0114,	0x0603,
510 	0x0118,	0x0601,	0x0602,	0x0117,	0x0600,	0x0119,	0x0115,	0x0116,
511 	0x011a,	0x010c,	0x010d,	0x011b,	0x011c,	0x0110,	0x0311,	0x011d,
512 	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
513 }, shift_map[NR_KEYS] = {
514 	0x0200,	0x001b,	0x0021,	0x0040,	0x0023,	0x0024,	0x0025,	0x005e,
515 	0x0026,	0x002a,	0x0028,	0x0029,	0x005f,	0x002b,	0x007f,	0x0009,
516 	0x0b51,	0x0b57,	0x0b45,	0x0b52,	0x0b54,	0x0b59,	0x0b55,	0x0b49,
517 	0x0b4f,	0x0b50,	0x007b,	0x007d,	0x0201,	0x0702,	0x0b41,	0x0b53,
518 	0x0b44,	0x0b46,	0x0b47,	0x0b48,	0x0b4a,	0x0b4b,	0x0b4c,	0x003a,
519 	0x0022,	0x007e,	0x0700,	0x007c,	0x0b5a,	0x0b58,	0x0b43,	0x0b56,
520 	0x0b42,	0x0b4e,	0x0b4d,	0x003c,	0x003e,	0x003f,	0x0700,	0x030c,
521 	0x0703,	0x0020,	0x0207,	0x010a,	0x010b,	0x010c,	0x010d,	0x010e,
522 	0x010f,	0x0110,	0x0111,	0x0112,	0x0113,	0x0213,	0x0203,	0x0307,
523 	0x0308,	0x0309,	0x030b,	0x0304,	0x0305,	0x0306,	0x030a,	0x0301,
524 	0x0302,	0x0303,	0x0300,	0x0310,	0x0206,	0x0200,	0x003e,	0x010a,
525 	0x010b,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
526 	0x030e,	0x0702,	0x030d,	0x0200,	0x0701,	0x0205,	0x0114,	0x0603,
527 	0x020b,	0x0601,	0x0602,	0x0117,	0x0600,	0x020a,	0x0115,	0x0116,
528 	0x011a,	0x010c,	0x010d,	0x011b,	0x011c,	0x0110,	0x0311,	0x011d,
529 	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
530 }, altgr_map[NR_KEYS] = {
531 	0x0200,	0x0200,	0x0200,	0x0040,	0x0200,	0x0024,	0x0200,	0x0200,
532 	0x007b,	0x005b,	0x005d,	0x007d,	0x005c,	0x0200,	0x0200,	0x0200,
533 	0x0b71,	0x0b77,	0x0918,	0x0b72,	0x0b74,	0x0b79,	0x0b75,	0x0b69,
534 	0x0b6f,	0x0b70,	0x0200,	0x007e,	0x0201,	0x0702,	0x0914,	0x0b73,
535 	0x0917,	0x0919,	0x0b67,	0x0b68,	0x0b6a,	0x0b6b,	0x0b6c,	0x0200,
536 	0x0200,	0x0200,	0x0700,	0x0200,	0x0b7a,	0x0b78,	0x0916,	0x0b76,
537 	0x0915,	0x0b6e,	0x0b6d,	0x0200,	0x0200,	0x0200,	0x0700,	0x030c,
538 	0x0703,	0x0200,	0x0207,	0x050c,	0x050d,	0x050e,	0x050f,	0x0510,
539 	0x0511,	0x0512,	0x0513,	0x0514,	0x0515,	0x0208,	0x0202,	0x0911,
540 	0x0912,	0x0913,	0x030b,	0x090e,	0x090f,	0x0910,	0x030a,	0x090b,
541 	0x090c,	0x090d,	0x090a,	0x0310,	0x0206,	0x0200,	0x007c,	0x0516,
542 	0x0517,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
543 	0x030e,	0x0702,	0x030d,	0x0200,	0x0701,	0x0205,	0x0114,	0x0603,
544 	0x0118,	0x0601,	0x0602,	0x0117,	0x0600,	0x0119,	0x0115,	0x0116,
545 	0x011a,	0x010c,	0x010d,	0x011b,	0x011c,	0x0110,	0x0311,	0x011d,
546 	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
547 }, ctrl_map[NR_KEYS] = {
548 	0x0200,	0x0200,	0x0200,	0x0000,	0x001b,	0x001c,	0x001d,	0x001e,
549 	0x001f,	0x007f,	0x0200,	0x0200,	0x001f,	0x0200,	0x0008,	0x0200,
550 	0x0011,	0x0017,	0x0005,	0x0012,	0x0014,	0x0019,	0x0015,	0x0009,
551 	0x000f,	0x0010,	0x001b,	0x001d,	0x0201,	0x0702,	0x0001,	0x0013,
552 	0x0004,	0x0006,	0x0007,	0x0008,	0x000a,	0x000b,	0x000c,	0x0200,
553 	0x0007,	0x0000,	0x0700,	0x001c,	0x001a,	0x0018,	0x0003,	0x0016,
554 	0x0002,	0x000e,	0x000d,	0x0200,	0x020e,	0x007f,	0x0700,	0x030c,
555 	0x0703,	0x0000,	0x0207,	0x0100,	0x0101,	0x0102,	0x0103,	0x0104,
556 	0x0105,	0x0106,	0x0107,	0x0108,	0x0109,	0x0208,	0x0204,	0x0307,
557 	0x0308,	0x0309,	0x030b,	0x0304,	0x0305,	0x0306,	0x030a,	0x0301,
558 	0x0302,	0x0303,	0x0300,	0x0310,	0x0206,	0x0200,	0x0200,	0x010a,
559 	0x010b,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
560 	0x030e,	0x0702,	0x030d,	0x001c,	0x0701,	0x0205,	0x0114,	0x0603,
561 	0x0118,	0x0601,	0x0602,	0x0117,	0x0600,	0x0119,	0x0115,	0x0116,
562 	0x011a,	0x010c,	0x010d,	0x011b,	0x011c,	0x0110,	0x0311,	0x011d,
563 	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
564 };
565 
566 const u_short * const linux_keytabs[] = {
567 	plain_map, shift_map, altgr_map, altgr_map, ctrl_map
568 };
569 #endif
570 
571 static struct biosdisk_info *
572 fd2biosinfo(p, fp)
573 	struct proc *p;
574 	struct file *fp;
575 {
576 	struct vnode *vp;
577 	const char *blkname;
578 	char diskname[16];
579 	int i;
580 	struct nativedisk_info *nip;
581 	struct disklist *dl = i386_alldisks;
582 
583 	if (fp->f_type != DTYPE_VNODE)
584 		return NULL;
585 	vp = (struct vnode *)fp->f_data;
586 
587 	if (vp->v_type != VBLK)
588 		return NULL;
589 
590 	blkname = findblkname(major(vp->v_rdev));
591 	snprintf(diskname, sizeof diskname, "%s%u", blkname,
592 	    DISKUNIT(vp->v_rdev));
593 
594 	for (i = 0; i < dl->dl_nnativedisks; i++) {
595 		nip = &dl->dl_nativedisks[i];
596 		if (strcmp(diskname, nip->ni_devname))
597 			continue;
598 		if (nip->ni_nmatches != 0)
599 			return &dl->dl_biosdisks[nip->ni_biosmatches[0]];
600 	}
601 
602 	return NULL;
603 }
604 
605 
606 /*
607  * We come here in a last attempt to satisfy a Linux ioctl() call
608  */
609 int
610 linux_machdepioctl(p, v, retval)
611 	struct proc *p;
612 	void *v;
613 	register_t *retval;
614 {
615 	struct linux_sys_ioctl_args /* {
616 		syscallarg(int) fd;
617 		syscallarg(u_long) com;
618 		syscallarg(caddr_t) data;
619 	} */ *uap = v;
620 	struct sys_ioctl_args bia;
621 	u_long com;
622 	int error, error1;
623 #if (NWSDISPLAY > 0)
624 	struct vt_mode lvt;
625 	caddr_t bvtp, sg;
626 	struct kbentry kbe;
627 #endif
628 	struct linux_hd_geometry hdg;
629 	struct linux_hd_big_geometry hdg_big;
630 	struct biosdisk_info *bip;
631 	struct filedesc *fdp;
632 	struct file *fp;
633 	int fd;
634 	struct disklabel label, *labp;
635 	struct partinfo partp;
636 	int (*ioctlf) __P((struct file *, u_long, caddr_t, struct proc *));
637 	u_long start, biostotal, realtotal;
638 	u_char heads, sectors;
639 	u_int cylinders;
640 	struct ioctl_pt pt;
641 
642 	fd = SCARG(uap, fd);
643 	SCARG(&bia, fd) = fd;
644 	SCARG(&bia, data) = SCARG(uap, data);
645 	com = SCARG(uap, com);
646 
647 	fdp = p->p_fd;
648 
649 	if ((fp = fd_getfile(fdp, fd)) == NULL)
650 		return (EBADF);
651 
652 	switch (com) {
653 #if (NWSDISPLAY > 0)
654 	case LINUX_KDGKBMODE:
655 		com = KDGKBMODE;
656 		break;
657 	case LINUX_KDSKBMODE:
658 		com = KDSKBMODE;
659 		if ((unsigned)SCARG(uap, data) == LINUX_K_MEDIUMRAW)
660 			SCARG(&bia, data) = (caddr_t)K_RAW;
661 		break;
662 	case LINUX_KIOCSOUND:
663 		SCARG(&bia, data) =
664 		    (caddr_t)(((unsigned long)SCARG(&bia, data)) & 0xffff);
665 		/* fall through */
666 	case LINUX_KDMKTONE:
667 		com = KDMKTONE;
668 		break;
669 	case LINUX_KDSETMODE:
670 		com = KDSETMODE;
671 		break;
672 	case LINUX_KDGETMODE:
673 		/* KD_* values are equal to the wscons numbers */
674 		com = WSDISPLAYIO_GMODE;
675 		break;
676 	case LINUX_KDENABIO:
677 		com = KDENABIO;
678 		break;
679 	case LINUX_KDDISABIO:
680 		com = KDDISABIO;
681 		break;
682 	case LINUX_KDGETLED:
683 		com = KDGETLED;
684 		break;
685 	case LINUX_KDSETLED:
686 		com = KDSETLED;
687 		break;
688 	case LINUX_VT_OPENQRY:
689 		com = VT_OPENQRY;
690 		break;
691 	case LINUX_VT_GETMODE:
692 		SCARG(&bia, com) = VT_GETMODE;
693 		if ((error = sys_ioctl(p, &bia, retval)))
694 			return error;
695 		if ((error = copyin(SCARG(uap, data), (caddr_t)&lvt,
696 		    sizeof (struct vt_mode))))
697 			return error;
698 		lvt.relsig = native_to_linux_sig[lvt.relsig];
699 		lvt.acqsig = native_to_linux_sig[lvt.acqsig];
700 		lvt.frsig = native_to_linux_sig[lvt.frsig];
701 		return copyout((caddr_t)&lvt, SCARG(uap, data),
702 		    sizeof (struct vt_mode));
703 	case LINUX_VT_SETMODE:
704 		com = VT_SETMODE;
705 		if ((error = copyin(SCARG(uap, data), (caddr_t)&lvt,
706 		    sizeof (struct vt_mode))))
707 			return error;
708 		lvt.relsig = linux_to_native_sig[lvt.relsig];
709 		lvt.acqsig = linux_to_native_sig[lvt.acqsig];
710 		lvt.frsig = linux_to_native_sig[lvt.frsig];
711 		sg = stackgap_init(p->p_emul);
712 		bvtp = stackgap_alloc(&sg, sizeof (struct vt_mode));
713 		if ((error = copyout(&lvt, bvtp, sizeof (struct vt_mode))))
714 			return error;
715 		SCARG(&bia, data) = bvtp;
716 		break;
717 	case LINUX_VT_DISALLOCATE:
718 		/* XXX should use WSDISPLAYIO_DELSCREEN */
719 		return 0;
720 	case LINUX_VT_RELDISP:
721 		com = VT_RELDISP;
722 		break;
723 	case LINUX_VT_ACTIVATE:
724 		com = VT_ACTIVATE;
725 		break;
726 	case LINUX_VT_WAITACTIVE:
727 		com = VT_WAITACTIVE;
728 		break;
729 	case LINUX_VT_GETSTATE:
730 		com = VT_GETSTATE;
731 		break;
732 	case LINUX_KDGKBTYPE:
733 		/* This is what Linux does. */
734 		return (subyte(SCARG(uap, data), KB_101));
735 	case LINUX_KDGKBENT:
736 		/*
737 		 * The Linux KDGKBENT ioctl is different from the
738 		 * SYSV original. So we handle it in machdep code.
739 		 * XXX We should use keyboard mapping information
740 		 * from wsdisplay, but this would be expensive.
741 		 */
742 		if ((error = copyin(SCARG(uap, data), &kbe,
743 				    sizeof(struct kbentry))))
744 			return (error);
745 		if (kbe.kb_table >= sizeof(linux_keytabs) / sizeof(u_short *)
746 		    || kbe.kb_index >= NR_KEYS)
747 			return (EINVAL);
748 		kbe.kb_value = linux_keytabs[kbe.kb_table][kbe.kb_index];
749 		return (copyout(&kbe, SCARG(uap, data),
750 				sizeof(struct kbentry)));
751 #endif
752 	case LINUX_HDIO_GETGEO:
753 	case LINUX_HDIO_GETGEO_BIG:
754 		/*
755 		 * Try to mimic Linux behaviour: return the BIOS geometry
756 		 * if possible (extending its # of cylinders if it's beyond
757 		 * the 1023 limit), fall back to the MI geometry (i.e.
758 		 * the real geometry) if not found, by returning an
759 		 * error. See common/linux_hdio.c
760 		 */
761 		FILE_USE(fp);
762 		bip = fd2biosinfo(p, fp);
763 		ioctlf = fp->f_ops->fo_ioctl;
764 		error = ioctlf(fp, DIOCGDEFLABEL, (caddr_t)&label, p);
765 		error1 = ioctlf(fp, DIOCGPART, (caddr_t)&partp, p);
766 		FILE_UNUSE(fp, p);
767 		if (error != 0 && error1 != 0)
768 			return error1;
769 		labp = error != 0 ? &label : partp.disklab;
770 		start = error1 != 0 ? partp.part->p_offset : 0;
771 		if (bip != NULL && bip->bi_head != 0 && bip->bi_sec != 0
772 		    && bip->bi_cyl != 0) {
773 			heads = bip->bi_head;
774 			sectors = bip->bi_sec;
775 			cylinders = bip->bi_cyl;
776 			biostotal = heads * sectors * cylinders;
777 			realtotal = labp->d_ntracks * labp->d_nsectors *
778 			    labp->d_ncylinders;
779 			if (realtotal > biostotal)
780 				cylinders = realtotal / (heads * sectors);
781 		} else {
782 			heads = labp->d_ntracks;
783 			cylinders = labp->d_ncylinders;
784 			sectors = labp->d_nsectors;
785 		}
786 		if (com == LINUX_HDIO_GETGEO) {
787 			hdg.start = start;
788 			hdg.heads = heads;
789 			hdg.cylinders = cylinders;
790 			hdg.sectors = sectors;
791 			return copyout(&hdg, SCARG(uap, data), sizeof hdg);
792 		} else {
793 			hdg_big.start = start;
794 			hdg_big.heads = heads;
795 			hdg_big.cylinders = cylinders;
796 			hdg_big.sectors = sectors;
797 			return copyout(&hdg_big, SCARG(uap, data),
798 			    sizeof hdg_big);
799 		}
800 		return 0;
801 
802 	default:
803 		/*
804 		 * Unknown to us. If it's on a device, just pass it through
805 		 * using PTIOCLINUX, the device itself might be able to
806 		 * make some sense of it.
807 		 * XXX hack: if the function returns EJUSTRETURN,
808 		 * it has stuffed a sysctl return value in pt.data.
809 		 */
810 		FILE_USE(fp);
811 		ioctlf = fp->f_ops->fo_ioctl;
812 		pt.com = SCARG(uap, com);
813 		pt.data = SCARG(uap, data);
814 		error = ioctlf(fp, PTIOCLINUX, (caddr_t)&pt, p);
815 		FILE_UNUSE(fp, p);
816 		if (error == EJUSTRETURN) {
817 			retval[0] = (register_t)pt.data;
818 			error = 0;
819 		}
820 
821 		if (error == ENOTTY)
822 			printf("linux_machdepioctl: invalid ioctl %08lx\n",
823 			    com);
824 		return error;
825 	}
826 	SCARG(&bia, com) = com;
827 	return sys_ioctl(p, &bia, retval);
828 }
829 
830 /*
831  * Set I/O permissions for a process. Just set the maximum level
832  * right away (ignoring the argument), otherwise we would have
833  * to rely on I/O permission maps, which are not implemented.
834  */
835 int
836 linux_sys_iopl(p, v, retval)
837 	struct proc *p;
838 	void *v;
839 	register_t *retval;
840 {
841 #if 0
842 	struct linux_sys_iopl_args /* {
843 		syscallarg(int) level;
844 	} */ *uap = v;
845 #endif
846 	struct trapframe *fp = p->p_md.md_regs;
847 
848 	if (suser(p->p_ucred, &p->p_acflag) != 0)
849 		return EPERM;
850 	fp->tf_eflags |= PSL_IOPL;
851 	*retval = 0;
852 	return 0;
853 }
854 
855 /*
856  * See above. If a root process tries to set access to an I/O port,
857  * just let it have the whole range.
858  */
859 int
860 linux_sys_ioperm(p, v, retval)
861 	struct proc *p;
862 	void *v;
863 	register_t *retval;
864 {
865 	struct linux_sys_ioperm_args /* {
866 		syscallarg(unsigned int) lo;
867 		syscallarg(unsigned int) hi;
868 		syscallarg(int) val;
869 	} */ *uap = v;
870 	struct trapframe *fp = p->p_md.md_regs;
871 
872 	if (suser(p->p_ucred, &p->p_acflag) != 0)
873 		return EPERM;
874 	if (SCARG(uap, val))
875 		fp->tf_eflags |= PSL_IOPL;
876 	*retval = 0;
877 	return 0;
878 }
879