xref: /netbsd-src/sys/compat/linux/arch/i386/linux_machdep.c (revision 37b34d511dea595d3ba03a661cf3b775038ea5f8)
1 /*	$NetBSD: linux_machdep.c,v 1.81 2002/10/09 05:07:55 junyoung Exp $	*/
2 
3 /*-
4  * Copyright (c) 1995, 2000 The NetBSD Foundation, Inc.
5  * All rights reserved.
6  *
7  * This code is derived from software contributed to The NetBSD Foundation
8  * by Frank van der Linden.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  * 3. All advertising materials mentioning features or use of this software
19  *    must display the following acknowledgement:
20  *	This product includes software developed by the NetBSD
21  *	Foundation, Inc. and its contributors.
22  * 4. Neither the name of The NetBSD Foundation nor the names of its
23  *    contributors may be used to endorse or promote products derived
24  *    from this software without specific prior written permission.
25  *
26  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
27  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
28  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
29  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
30  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36  * POSSIBILITY OF SUCH DAMAGE.
37  */
38 
39 #include <sys/cdefs.h>
40 __KERNEL_RCSID(0, "$NetBSD: linux_machdep.c,v 1.81 2002/10/09 05:07:55 junyoung Exp $");
41 
42 #if defined(_KERNEL_OPT)
43 #include "opt_vm86.h"
44 #include "opt_user_ldt.h"
45 #endif
46 
47 #include <sys/param.h>
48 #include <sys/systm.h>
49 #include <sys/signalvar.h>
50 #include <sys/kernel.h>
51 #include <sys/proc.h>
52 #include <sys/user.h>
53 #include <sys/buf.h>
54 #include <sys/reboot.h>
55 #include <sys/conf.h>
56 #include <sys/exec.h>
57 #include <sys/file.h>
58 #include <sys/callout.h>
59 #include <sys/malloc.h>
60 #include <sys/mbuf.h>
61 #include <sys/msgbuf.h>
62 #include <sys/mount.h>
63 #include <sys/vnode.h>
64 #include <sys/device.h>
65 #include <sys/syscallargs.h>
66 #include <sys/filedesc.h>
67 #include <sys/exec_elf.h>
68 #include <sys/disklabel.h>
69 #include <sys/ioctl.h>
70 #include <miscfs/specfs/specdev.h>
71 
72 #include <compat/linux/common/linux_types.h>
73 #include <compat/linux/common/linux_signal.h>
74 #include <compat/linux/common/linux_util.h>
75 #include <compat/linux/common/linux_ioctl.h>
76 #include <compat/linux/common/linux_hdio.h>
77 #include <compat/linux/common/linux_exec.h>
78 #include <compat/linux/common/linux_machdep.h>
79 
80 #include <compat/linux/linux_syscallargs.h>
81 
82 #include <machine/cpu.h>
83 #include <machine/cpufunc.h>
84 #include <machine/psl.h>
85 #include <machine/reg.h>
86 #include <machine/segments.h>
87 #include <machine/specialreg.h>
88 #include <machine/sysarch.h>
89 #include <machine/vm86.h>
90 #include <machine/vmparam.h>
91 
92 /*
93  * To see whether wscons is configured (for virtual console ioctl calls).
94  */
95 #if defined(_KERNEL_OPT)
96 #include "wsdisplay.h"
97 #endif
98 #if (NWSDISPLAY > 0)
99 #include <dev/wscons/wsconsio.h>
100 #include <dev/wscons/wsdisplay_usl_io.h>
101 #if defined(_KERNEL_OPT)
102 #include "opt_xserver.h"
103 #endif
104 #endif
105 
106 #ifdef USER_LDT
107 #include <machine/cpu.h>
108 int linux_read_ldt __P((struct proc *, struct linux_sys_modify_ldt_args *,
109     register_t *));
110 int linux_write_ldt __P((struct proc *, struct linux_sys_modify_ldt_args *,
111     register_t *));
112 #endif
113 
114 #ifdef DEBUG_LINUX
115 #define DPRINTF(a) uprintf a
116 #else
117 #define DPRINTF(a)
118 #endif
119 
120 static struct biosdisk_info *fd2biosinfo __P((struct proc *, struct file *));
121 extern struct disklist *i386_alldisks;
122 
123 /*
124  * Deal with some i386-specific things in the Linux emulation code.
125  */
126 
127 void
128 linux_setregs(p, epp, stack)
129 	struct proc *p;
130 	struct exec_package *epp;
131 	u_long stack;
132 {
133 	struct pcb *pcb = &p->p_addr->u_pcb;
134 	struct trapframe *tf;
135 
136 #if NNPX > 0
137 	/* If we were using the FPU, forget about it. */
138 	if (npxproc == p)
139 		npxdrop();
140 #endif
141 
142 #ifdef USER_LDT
143 	pmap_ldt_cleanup(p);
144 #endif
145 
146 	p->p_md.md_flags &= ~MDP_USEDFPU;
147 
148 	if (i386_use_fxsave) {
149 		pcb->pcb_savefpu.sv_xmm.sv_env.en_cw = __Linux_NPXCW__;
150 		pcb->pcb_savefpu.sv_xmm.sv_env.en_mxcsr = __INITIAL_MXCSR__;
151 	} else
152 		pcb->pcb_savefpu.sv_87.sv_env.en_cw = __Linux_NPXCW__;
153 
154 	tf = p->p_md.md_regs;
155 	tf->tf_gs = GSEL(GUDATA_SEL, SEL_UPL);
156 	tf->tf_fs = GSEL(GUDATA_SEL, SEL_UPL);
157 	tf->tf_es = GSEL(GUDATA_SEL, SEL_UPL);
158 	tf->tf_ds = GSEL(GUDATA_SEL, SEL_UPL);
159 	tf->tf_edi = 0;
160 	tf->tf_esi = 0;
161 	tf->tf_ebp = 0;
162 	tf->tf_ebx = (int)p->p_psstr;
163 	tf->tf_edx = 0;
164 	tf->tf_ecx = 0;
165 	tf->tf_eax = 0;
166 	tf->tf_eip = epp->ep_entry;
167 	tf->tf_cs = GSEL(GUCODE_SEL, SEL_UPL);
168 	tf->tf_eflags = PSL_USERSET;
169 	tf->tf_esp = stack;
170 	tf->tf_ss = GSEL(GUDATA_SEL, SEL_UPL);
171 }
172 
173 /*
174  * Send an interrupt to process.
175  *
176  * Stack is set up to allow sigcode stored
177  * in u. to call routine, followed by kcall
178  * to sigreturn routine below.  After sigreturn
179  * resets the signal mask, the stack, and the
180  * frame pointer, it returns to the user
181  * specified pc, psl.
182  */
183 
184 void
185 linux_sendsig(sig, mask, code)
186 	int sig;
187 	sigset_t *mask;
188 	u_long code;
189 {
190 	struct proc *p = curproc;
191 	struct trapframe *tf;
192 	struct linux_sigframe *fp, frame;
193 	int onstack;
194 	sig_t catcher = SIGACTION(p, sig).sa_handler;
195 
196 	tf = p->p_md.md_regs;
197 
198 	/* Do we need to jump onto the signal stack? */
199 	onstack =
200 	    (p->p_sigctx.ps_sigstk.ss_flags & (SS_DISABLE | SS_ONSTACK)) == 0 &&
201 	    (SIGACTION(p, sig).sa_flags & SA_ONSTACK) != 0;
202 
203 	/* Allocate space for the signal handler context. */
204 	if (onstack)
205 		fp = (struct linux_sigframe *)((caddr_t)p->p_sigctx.ps_sigstk.ss_sp +
206 					  p->p_sigctx.ps_sigstk.ss_size);
207 	else
208 		fp = (struct linux_sigframe *)tf->tf_esp;
209 	fp--;
210 
211 	/* Build stack frame for signal trampoline. */
212 	frame.sf_handler = catcher;
213 	frame.sf_sig = native_to_linux_signo[sig];
214 
215 	/* Save register context. */
216 #ifdef VM86
217 	if (tf->tf_eflags & PSL_VM) {
218 		frame.sf_sc.sc_gs = tf->tf_vm86_gs;
219 		frame.sf_sc.sc_fs = tf->tf_vm86_fs;
220 		frame.sf_sc.sc_es = tf->tf_vm86_es;
221 		frame.sf_sc.sc_ds = tf->tf_vm86_ds;
222 		frame.sf_sc.sc_eflags = get_vflags(p);
223 	} else
224 #endif
225 	{
226 		frame.sf_sc.sc_gs = tf->tf_gs;
227 		frame.sf_sc.sc_fs = tf->tf_fs;
228 		frame.sf_sc.sc_es = tf->tf_es;
229 		frame.sf_sc.sc_ds = tf->tf_ds;
230 		frame.sf_sc.sc_eflags = tf->tf_eflags;
231 	}
232 	frame.sf_sc.sc_edi = tf->tf_edi;
233 	frame.sf_sc.sc_esi = tf->tf_esi;
234 	frame.sf_sc.sc_ebp = tf->tf_ebp;
235 	frame.sf_sc.sc_ebx = tf->tf_ebx;
236 	frame.sf_sc.sc_edx = tf->tf_edx;
237 	frame.sf_sc.sc_ecx = tf->tf_ecx;
238 	frame.sf_sc.sc_eax = tf->tf_eax;
239 	frame.sf_sc.sc_eip = tf->tf_eip;
240 	frame.sf_sc.sc_cs = tf->tf_cs;
241 	frame.sf_sc.sc_esp_at_signal = tf->tf_esp;
242 	frame.sf_sc.sc_ss = tf->tf_ss;
243 	frame.sf_sc.sc_err = tf->tf_err;
244 	frame.sf_sc.sc_trapno = tf->tf_trapno;
245 	frame.sf_sc.sc_cr2 = p->p_addr->u_pcb.pcb_cr2;
246 
247 	/* Save signal stack. */
248 	/* Linux doesn't save the onstack flag in sigframe */
249 
250 	/* Save signal mask. */
251 	native_to_linux_old_sigset(&frame.sf_sc.sc_mask, mask);
252 
253 	if (copyout(&frame, fp, sizeof(frame)) != 0) {
254 		/*
255 		 * Process has trashed its stack; give it an illegal
256 		 * instruction to halt it in its tracks.
257 		 */
258 		sigexit(p, SIGILL);
259 		/* NOTREACHED */
260 	}
261 
262 	/*
263 	 * Build context to run handler in.
264 	 */
265 	tf->tf_gs = GSEL(GUDATA_SEL, SEL_UPL);
266 	tf->tf_fs = GSEL(GUDATA_SEL, SEL_UPL);
267 	tf->tf_es = GSEL(GUDATA_SEL, SEL_UPL);
268 	tf->tf_ds = GSEL(GUDATA_SEL, SEL_UPL);
269 	tf->tf_eip = (int)p->p_sigctx.ps_sigcode;
270 	tf->tf_cs = GSEL(GUCODE_SEL, SEL_UPL);
271 	tf->tf_eflags &= ~(PSL_T|PSL_VM|PSL_AC);
272 	tf->tf_esp = (int)fp;
273 	tf->tf_ss = GSEL(GUDATA_SEL, SEL_UPL);
274 
275 	/* Remember that we're now on the signal stack. */
276 	if (onstack)
277 		p->p_sigctx.ps_sigstk.ss_flags |= SS_ONSTACK;
278 }
279 
280 /*
281  * System call to cleanup state after a signal
282  * has been taken.  Reset signal mask and
283  * stack state from context left by sendsig (above).
284  * Return to previous pc and psl as specified by
285  * context left by sendsig. Check carefully to
286  * make sure that the user has not modified the
287  * psl to gain improper privileges or to cause
288  * a machine fault.
289  */
290 int
291 linux_sys_rt_sigreturn(p, v, retval)
292 	struct proc *p;
293 	void *v;
294 	register_t *retval;
295 {
296 	/* XXX XAX write me */
297 	return(ENOSYS);
298 }
299 
300 int
301 linux_sys_sigreturn(p, v, retval)
302 	struct proc *p;
303 	void *v;
304 	register_t *retval;
305 {
306 	struct linux_sys_sigreturn_args /* {
307 		syscallarg(struct linux_sigcontext *) scp;
308 	} */ *uap = v;
309 	struct linux_sigcontext *scp, context;
310 	struct trapframe *tf;
311 	sigset_t mask;
312 	ssize_t ss_gap;
313 
314 	/*
315 	 * The trampoline code hands us the context.
316 	 * It is unsafe to keep track of it ourselves, in the event that a
317 	 * program jumps out of a signal handler.
318 	 */
319 	scp = SCARG(uap, scp);
320 	if (copyin((caddr_t)scp, &context, sizeof(*scp)) != 0)
321 		return (EFAULT);
322 
323 	/* Restore register context. */
324 	tf = p->p_md.md_regs;
325 #ifdef VM86
326 	if (context.sc_eflags & PSL_VM) {
327 		tf->tf_vm86_gs = context.sc_gs;
328 		tf->tf_vm86_fs = context.sc_fs;
329 		tf->tf_vm86_es = context.sc_es;
330 		tf->tf_vm86_ds = context.sc_ds;
331 		set_vflags(p, context.sc_eflags);
332 	} else
333 #endif
334 	{
335 		/*
336 		 * Check for security violations.  If we're returning to
337 		 * protected mode, the CPU will validate the segment registers
338 		 * automatically and generate a trap on violations.  We handle
339 		 * the trap, rather than doing all of the checking here.
340 		 */
341 		if (((context.sc_eflags ^ tf->tf_eflags) & PSL_USERSTATIC) != 0 ||
342 		    !USERMODE(context.sc_cs, context.sc_eflags))
343 			return (EINVAL);
344 
345 		tf->tf_gs = context.sc_gs;
346 		tf->tf_fs = context.sc_fs;
347 		tf->tf_es = context.sc_es;
348 		tf->tf_ds = context.sc_ds;
349 		tf->tf_eflags = context.sc_eflags;
350 	}
351 	tf->tf_edi = context.sc_edi;
352 	tf->tf_esi = context.sc_esi;
353 	tf->tf_ebp = context.sc_ebp;
354 	tf->tf_ebx = context.sc_ebx;
355 	tf->tf_edx = context.sc_edx;
356 	tf->tf_ecx = context.sc_ecx;
357 	tf->tf_eax = context.sc_eax;
358 	tf->tf_eip = context.sc_eip;
359 	tf->tf_cs = context.sc_cs;
360 	tf->tf_esp = context.sc_esp_at_signal;
361 	tf->tf_ss = context.sc_ss;
362 
363 	/* Restore signal stack. */
364 	/*
365 	 * Linux really does it this way; it doesn't have space in sigframe
366 	 * to save the onstack flag.
367 	 */
368 	ss_gap = (ssize_t)
369 	    ((caddr_t) context.sc_esp_at_signal - (caddr_t) p->p_sigctx.ps_sigstk.ss_sp);
370 	if (ss_gap >= 0  && ss_gap < p->p_sigctx.ps_sigstk.ss_size)
371 		p->p_sigctx.ps_sigstk.ss_flags |= SS_ONSTACK;
372 	else
373 		p->p_sigctx.ps_sigstk.ss_flags &= ~SS_ONSTACK;
374 
375 	/* Restore signal mask. */
376 	linux_old_to_native_sigset(&mask, &context.sc_mask);
377 	(void) sigprocmask1(p, SIG_SETMASK, &mask, 0);
378 
379 	return (EJUSTRETURN);
380 }
381 
382 #ifdef USER_LDT
383 
384 int
385 linux_read_ldt(p, uap, retval)
386 	struct proc *p;
387 	struct linux_sys_modify_ldt_args /* {
388 		syscallarg(int) func;
389 		syscallarg(void *) ptr;
390 		syscallarg(size_t) bytecount;
391 	} */ *uap;
392 	register_t *retval;
393 {
394 	struct i386_get_ldt_args gl;
395 	int error;
396 	caddr_t sg;
397 	char *parms;
398 
399 	DPRINTF(("linux_read_ldt!"));
400 	sg = stackgap_init(p, 0);
401 
402 	gl.start = 0;
403 	gl.desc = SCARG(uap, ptr);
404 	gl.num = SCARG(uap, bytecount) / sizeof(union descriptor);
405 
406 	parms = stackgap_alloc(p, &sg, sizeof(gl));
407 
408 	if ((error = copyout(&gl, parms, sizeof(gl))) != 0)
409 		return (error);
410 
411 	if ((error = i386_get_ldt(p, parms, retval)) != 0)
412 		return (error);
413 
414 	*retval *= sizeof(union descriptor);
415 	return (0);
416 }
417 
418 struct linux_ldt_info {
419 	u_int entry_number;
420 	u_long base_addr;
421 	u_int limit;
422 	u_int seg_32bit:1;
423 	u_int contents:2;
424 	u_int read_exec_only:1;
425 	u_int limit_in_pages:1;
426 	u_int seg_not_present:1;
427 	u_int useable:1;
428 };
429 
430 int
431 linux_write_ldt(p, uap, retval)
432 	struct proc *p;
433 	struct linux_sys_modify_ldt_args /* {
434 		syscallarg(int) func;
435 		syscallarg(void *) ptr;
436 		syscallarg(size_t) bytecount;
437 	} */ *uap;
438 	register_t *retval;
439 {
440 	struct linux_ldt_info ldt_info;
441 	struct segment_descriptor sd;
442 	struct i386_set_ldt_args sl;
443 	int error;
444 	caddr_t sg;
445 	char *parms;
446 	int oldmode = (int)retval[0];
447 
448 	DPRINTF(("linux_write_ldt %d\n", oldmode));
449 	if (SCARG(uap, bytecount) != sizeof(ldt_info))
450 		return (EINVAL);
451 	if ((error = copyin(SCARG(uap, ptr), &ldt_info, sizeof(ldt_info))) != 0)
452 		return error;
453 	if (ldt_info.entry_number >= 8192)
454 		return (EINVAL);
455 	if (ldt_info.contents == 3) {
456 		if (oldmode)
457 			return (EINVAL);
458 		if (ldt_info.seg_not_present)
459 			return (EINVAL);
460 	}
461 
462 	if (ldt_info.base_addr == 0 && ldt_info.limit == 0 &&
463 	    (oldmode || (ldt_info.contents == 0 &&
464 	    ldt_info.read_exec_only == 1 && ldt_info.seg_32bit == 0 &&
465 	    ldt_info.limit_in_pages == 0 && ldt_info.seg_not_present == 1 &&
466 	    ldt_info.useable == 0))) {
467 		/* this means you should zero the ldt */
468 		(void)memset(&sd, 0, sizeof(sd));
469 	} else {
470 		sd.sd_lobase = ldt_info.base_addr & 0xffffff;
471 		sd.sd_hibase = (ldt_info.base_addr >> 24) & 0xff;
472 		sd.sd_lolimit = ldt_info.limit & 0xffff;
473 		sd.sd_hilimit = (ldt_info.limit >> 16) & 0xf;
474 		sd.sd_type = 16 | (ldt_info.contents << 2) |
475 		    (!ldt_info.read_exec_only << 1);
476 		sd.sd_dpl = SEL_UPL;
477 		sd.sd_p = !ldt_info.seg_not_present;
478 		sd.sd_def32 = ldt_info.seg_32bit;
479 		sd.sd_gran = ldt_info.limit_in_pages;
480 		if (!oldmode)
481 			sd.sd_xx = ldt_info.useable;
482 		else
483 			sd.sd_xx = 0;
484 	}
485 	sg = stackgap_init(p, 0);
486 	sl.start = ldt_info.entry_number;
487 	sl.desc = stackgap_alloc(p, &sg, sizeof(sd));
488 	sl.num = 1;
489 
490 	DPRINTF(("linux_write_ldt: idx=%d, base=0x%lx, limit=0x%x\n",
491 	    ldt_info.entry_number, ldt_info.base_addr, ldt_info.limit));
492 
493 	parms = stackgap_alloc(p, &sg, sizeof(sl));
494 
495 	if ((error = copyout(&sd, sl.desc, sizeof(sd))) != 0)
496 		return (error);
497 	if ((error = copyout(&sl, parms, sizeof(sl))) != 0)
498 		return (error);
499 
500 	if ((error = i386_set_ldt(p, parms, retval)) != 0)
501 		return (error);
502 
503 	*retval = 0;
504 	return (0);
505 }
506 
507 #endif /* USER_LDT */
508 
509 int
510 linux_sys_modify_ldt(p, v, retval)
511 	struct proc *p;
512 	void *v;
513 	register_t *retval;
514 {
515 	struct linux_sys_modify_ldt_args /* {
516 		syscallarg(int) func;
517 		syscallarg(void *) ptr;
518 		syscallarg(size_t) bytecount;
519 	} */ *uap = v;
520 
521 	switch (SCARG(uap, func)) {
522 #ifdef USER_LDT
523 	case 0:
524 		return linux_read_ldt(p, uap, retval);
525 	case 1:
526 		retval[0] = 1;
527 		return linux_write_ldt(p, uap, retval);
528 	case 2:
529 #ifdef notyet
530 		return (linux_read_default_ldt(p, uap, retval);
531 #else
532 		return (ENOSYS);
533 #endif
534 	case 0x11:
535 		retval[0] = 0;
536 		return linux_write_ldt(p, uap, retval);
537 #endif /* USER_LDT */
538 
539 	default:
540 		return (ENOSYS);
541 	}
542 }
543 
544 /*
545  * XXX Pathetic hack to make svgalib work. This will fake the major
546  * device number of an opened VT so that svgalib likes it. grmbl.
547  * Should probably do it 'wrong the right way' and use a mapping
548  * array for all major device numbers, and map linux_mknod too.
549  */
550 dev_t
551 linux_fakedev(dev, raw)
552 	dev_t dev;
553 	int raw;
554 {
555 	if (raw) {
556 #if (NWSDISPLAY > 0)
557 		extern const struct cdevsw wsdisplay_cdevsw;
558 		if (cdevsw_lookup(dev) == &wsdisplay_cdevsw)
559 			return makedev(LINUX_CONS_MAJOR, (minor(dev) + 1));
560 #endif
561 	}
562 
563 	return dev;
564 }
565 
566 #if (NWSDISPLAY > 0)
567 /*
568  * That's not complete, but enough to get an X server running.
569  */
570 #define NR_KEYS 128
571 static const u_short plain_map[NR_KEYS] = {
572 	0x0200,	0x001b,	0x0031,	0x0032,	0x0033,	0x0034,	0x0035,	0x0036,
573 	0x0037,	0x0038,	0x0039,	0x0030,	0x002d,	0x003d,	0x007f,	0x0009,
574 	0x0b71,	0x0b77,	0x0b65,	0x0b72,	0x0b74,	0x0b79,	0x0b75,	0x0b69,
575 	0x0b6f,	0x0b70,	0x005b,	0x005d,	0x0201,	0x0702,	0x0b61,	0x0b73,
576 	0x0b64,	0x0b66,	0x0b67,	0x0b68,	0x0b6a,	0x0b6b,	0x0b6c,	0x003b,
577 	0x0027,	0x0060,	0x0700,	0x005c,	0x0b7a,	0x0b78,	0x0b63,	0x0b76,
578 	0x0b62,	0x0b6e,	0x0b6d,	0x002c,	0x002e,	0x002f,	0x0700,	0x030c,
579 	0x0703,	0x0020,	0x0207,	0x0100,	0x0101,	0x0102,	0x0103,	0x0104,
580 	0x0105,	0x0106,	0x0107,	0x0108,	0x0109,	0x0208,	0x0209,	0x0307,
581 	0x0308,	0x0309,	0x030b,	0x0304,	0x0305,	0x0306,	0x030a,	0x0301,
582 	0x0302,	0x0303,	0x0300,	0x0310,	0x0206,	0x0200,	0x003c,	0x010a,
583 	0x010b,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
584 	0x030e,	0x0702,	0x030d,	0x001c,	0x0701,	0x0205,	0x0114,	0x0603,
585 	0x0118,	0x0601,	0x0602,	0x0117,	0x0600,	0x0119,	0x0115,	0x0116,
586 	0x011a,	0x010c,	0x010d,	0x011b,	0x011c,	0x0110,	0x0311,	0x011d,
587 	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
588 }, shift_map[NR_KEYS] = {
589 	0x0200,	0x001b,	0x0021,	0x0040,	0x0023,	0x0024,	0x0025,	0x005e,
590 	0x0026,	0x002a,	0x0028,	0x0029,	0x005f,	0x002b,	0x007f,	0x0009,
591 	0x0b51,	0x0b57,	0x0b45,	0x0b52,	0x0b54,	0x0b59,	0x0b55,	0x0b49,
592 	0x0b4f,	0x0b50,	0x007b,	0x007d,	0x0201,	0x0702,	0x0b41,	0x0b53,
593 	0x0b44,	0x0b46,	0x0b47,	0x0b48,	0x0b4a,	0x0b4b,	0x0b4c,	0x003a,
594 	0x0022,	0x007e,	0x0700,	0x007c,	0x0b5a,	0x0b58,	0x0b43,	0x0b56,
595 	0x0b42,	0x0b4e,	0x0b4d,	0x003c,	0x003e,	0x003f,	0x0700,	0x030c,
596 	0x0703,	0x0020,	0x0207,	0x010a,	0x010b,	0x010c,	0x010d,	0x010e,
597 	0x010f,	0x0110,	0x0111,	0x0112,	0x0113,	0x0213,	0x0203,	0x0307,
598 	0x0308,	0x0309,	0x030b,	0x0304,	0x0305,	0x0306,	0x030a,	0x0301,
599 	0x0302,	0x0303,	0x0300,	0x0310,	0x0206,	0x0200,	0x003e,	0x010a,
600 	0x010b,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
601 	0x030e,	0x0702,	0x030d,	0x0200,	0x0701,	0x0205,	0x0114,	0x0603,
602 	0x020b,	0x0601,	0x0602,	0x0117,	0x0600,	0x020a,	0x0115,	0x0116,
603 	0x011a,	0x010c,	0x010d,	0x011b,	0x011c,	0x0110,	0x0311,	0x011d,
604 	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
605 }, altgr_map[NR_KEYS] = {
606 	0x0200,	0x0200,	0x0200,	0x0040,	0x0200,	0x0024,	0x0200,	0x0200,
607 	0x007b,	0x005b,	0x005d,	0x007d,	0x005c,	0x0200,	0x0200,	0x0200,
608 	0x0b71,	0x0b77,	0x0918,	0x0b72,	0x0b74,	0x0b79,	0x0b75,	0x0b69,
609 	0x0b6f,	0x0b70,	0x0200,	0x007e,	0x0201,	0x0702,	0x0914,	0x0b73,
610 	0x0917,	0x0919,	0x0b67,	0x0b68,	0x0b6a,	0x0b6b,	0x0b6c,	0x0200,
611 	0x0200,	0x0200,	0x0700,	0x0200,	0x0b7a,	0x0b78,	0x0916,	0x0b76,
612 	0x0915,	0x0b6e,	0x0b6d,	0x0200,	0x0200,	0x0200,	0x0700,	0x030c,
613 	0x0703,	0x0200,	0x0207,	0x050c,	0x050d,	0x050e,	0x050f,	0x0510,
614 	0x0511,	0x0512,	0x0513,	0x0514,	0x0515,	0x0208,	0x0202,	0x0911,
615 	0x0912,	0x0913,	0x030b,	0x090e,	0x090f,	0x0910,	0x030a,	0x090b,
616 	0x090c,	0x090d,	0x090a,	0x0310,	0x0206,	0x0200,	0x007c,	0x0516,
617 	0x0517,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
618 	0x030e,	0x0702,	0x030d,	0x0200,	0x0701,	0x0205,	0x0114,	0x0603,
619 	0x0118,	0x0601,	0x0602,	0x0117,	0x0600,	0x0119,	0x0115,	0x0116,
620 	0x011a,	0x010c,	0x010d,	0x011b,	0x011c,	0x0110,	0x0311,	0x011d,
621 	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
622 }, ctrl_map[NR_KEYS] = {
623 	0x0200,	0x0200,	0x0200,	0x0000,	0x001b,	0x001c,	0x001d,	0x001e,
624 	0x001f,	0x007f,	0x0200,	0x0200,	0x001f,	0x0200,	0x0008,	0x0200,
625 	0x0011,	0x0017,	0x0005,	0x0012,	0x0014,	0x0019,	0x0015,	0x0009,
626 	0x000f,	0x0010,	0x001b,	0x001d,	0x0201,	0x0702,	0x0001,	0x0013,
627 	0x0004,	0x0006,	0x0007,	0x0008,	0x000a,	0x000b,	0x000c,	0x0200,
628 	0x0007,	0x0000,	0x0700,	0x001c,	0x001a,	0x0018,	0x0003,	0x0016,
629 	0x0002,	0x000e,	0x000d,	0x0200,	0x020e,	0x007f,	0x0700,	0x030c,
630 	0x0703,	0x0000,	0x0207,	0x0100,	0x0101,	0x0102,	0x0103,	0x0104,
631 	0x0105,	0x0106,	0x0107,	0x0108,	0x0109,	0x0208,	0x0204,	0x0307,
632 	0x0308,	0x0309,	0x030b,	0x0304,	0x0305,	0x0306,	0x030a,	0x0301,
633 	0x0302,	0x0303,	0x0300,	0x0310,	0x0206,	0x0200,	0x0200,	0x010a,
634 	0x010b,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
635 	0x030e,	0x0702,	0x030d,	0x001c,	0x0701,	0x0205,	0x0114,	0x0603,
636 	0x0118,	0x0601,	0x0602,	0x0117,	0x0600,	0x0119,	0x0115,	0x0116,
637 	0x011a,	0x010c,	0x010d,	0x011b,	0x011c,	0x0110,	0x0311,	0x011d,
638 	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
639 };
640 
641 const u_short * const linux_keytabs[] = {
642 	plain_map, shift_map, altgr_map, altgr_map, ctrl_map
643 };
644 #endif
645 
646 static struct biosdisk_info *
647 fd2biosinfo(p, fp)
648 	struct proc *p;
649 	struct file *fp;
650 {
651 	struct vnode *vp;
652 	const char *blkname;
653 	char diskname[16];
654 	int i;
655 	struct nativedisk_info *nip;
656 	struct disklist *dl = i386_alldisks;
657 
658 	if (fp->f_type != DTYPE_VNODE)
659 		return NULL;
660 	vp = (struct vnode *)fp->f_data;
661 
662 	if (vp->v_type != VBLK)
663 		return NULL;
664 
665 	blkname = devsw_blk2name(major(vp->v_rdev));
666 	snprintf(diskname, sizeof diskname, "%s%u", blkname,
667 	    DISKUNIT(vp->v_rdev));
668 
669 	for (i = 0; i < dl->dl_nnativedisks; i++) {
670 		nip = &dl->dl_nativedisks[i];
671 		if (strcmp(diskname, nip->ni_devname))
672 			continue;
673 		if (nip->ni_nmatches != 0)
674 			return &dl->dl_biosdisks[nip->ni_biosmatches[0]];
675 	}
676 
677 	return NULL;
678 }
679 
680 
681 /*
682  * We come here in a last attempt to satisfy a Linux ioctl() call
683  */
684 int
685 linux_machdepioctl(p, v, retval)
686 	struct proc *p;
687 	void *v;
688 	register_t *retval;
689 {
690 	struct linux_sys_ioctl_args /* {
691 		syscallarg(int) fd;
692 		syscallarg(u_long) com;
693 		syscallarg(caddr_t) data;
694 	} */ *uap = v;
695 	struct sys_ioctl_args bia;
696 	u_long com;
697 	int error, error1;
698 #if (NWSDISPLAY > 0)
699 	struct vt_mode lvt;
700 	caddr_t bvtp, sg;
701 	struct kbentry kbe;
702 #endif
703 	struct linux_hd_geometry hdg;
704 	struct linux_hd_big_geometry hdg_big;
705 	struct biosdisk_info *bip;
706 	struct filedesc *fdp;
707 	struct file *fp;
708 	int fd;
709 	struct disklabel label, *labp;
710 	struct partinfo partp;
711 	int (*ioctlf) __P((struct file *, u_long, caddr_t, struct proc *));
712 	u_long start, biostotal, realtotal;
713 	u_char heads, sectors;
714 	u_int cylinders;
715 	struct ioctl_pt pt;
716 
717 	fd = SCARG(uap, fd);
718 	SCARG(&bia, fd) = fd;
719 	SCARG(&bia, data) = SCARG(uap, data);
720 	com = SCARG(uap, com);
721 
722 	fdp = p->p_fd;
723 
724 	if ((fp = fd_getfile(fdp, fd)) == NULL)
725 		return (EBADF);
726 
727 	switch (com) {
728 #if (NWSDISPLAY > 0)
729 	case LINUX_KDGKBMODE:
730 		com = KDGKBMODE;
731 		break;
732 	case LINUX_KDSKBMODE:
733 		com = KDSKBMODE;
734 		if ((unsigned)SCARG(uap, data) == LINUX_K_MEDIUMRAW)
735 			SCARG(&bia, data) = (caddr_t)K_RAW;
736 		break;
737 	case LINUX_KIOCSOUND:
738 		SCARG(&bia, data) =
739 		    (caddr_t)(((unsigned long)SCARG(&bia, data)) & 0xffff);
740 		/* fall through */
741 	case LINUX_KDMKTONE:
742 		com = KDMKTONE;
743 		break;
744 	case LINUX_KDSETMODE:
745 		com = KDSETMODE;
746 		break;
747 	case LINUX_KDGETMODE:
748 		/* KD_* values are equal to the wscons numbers */
749 		com = WSDISPLAYIO_GMODE;
750 		break;
751 	case LINUX_KDENABIO:
752 		com = KDENABIO;
753 		break;
754 	case LINUX_KDDISABIO:
755 		com = KDDISABIO;
756 		break;
757 	case LINUX_KDGETLED:
758 		com = KDGETLED;
759 		break;
760 	case LINUX_KDSETLED:
761 		com = KDSETLED;
762 		break;
763 	case LINUX_VT_OPENQRY:
764 		com = VT_OPENQRY;
765 		break;
766 	case LINUX_VT_GETMODE:
767 		SCARG(&bia, com) = VT_GETMODE;
768 		if ((error = sys_ioctl(p, &bia, retval)))
769 			return error;
770 		if ((error = copyin(SCARG(uap, data), (caddr_t)&lvt,
771 		    sizeof (struct vt_mode))))
772 			return error;
773 		lvt.relsig = native_to_linux_signo[lvt.relsig];
774 		lvt.acqsig = native_to_linux_signo[lvt.acqsig];
775 		lvt.frsig = native_to_linux_signo[lvt.frsig];
776 		return copyout((caddr_t)&lvt, SCARG(uap, data),
777 		    sizeof (struct vt_mode));
778 	case LINUX_VT_SETMODE:
779 		com = VT_SETMODE;
780 		if ((error = copyin(SCARG(uap, data), (caddr_t)&lvt,
781 		    sizeof (struct vt_mode))))
782 			return error;
783 		lvt.relsig = linux_to_native_signo[lvt.relsig];
784 		lvt.acqsig = linux_to_native_signo[lvt.acqsig];
785 		lvt.frsig = linux_to_native_signo[lvt.frsig];
786 		sg = stackgap_init(p, 0);
787 		bvtp = stackgap_alloc(p, &sg, sizeof (struct vt_mode));
788 		if ((error = copyout(&lvt, bvtp, sizeof (struct vt_mode))))
789 			return error;
790 		SCARG(&bia, data) = bvtp;
791 		break;
792 	case LINUX_VT_DISALLOCATE:
793 		/* XXX should use WSDISPLAYIO_DELSCREEN */
794 		return 0;
795 	case LINUX_VT_RELDISP:
796 		com = VT_RELDISP;
797 		break;
798 	case LINUX_VT_ACTIVATE:
799 		com = VT_ACTIVATE;
800 		break;
801 	case LINUX_VT_WAITACTIVE:
802 		com = VT_WAITACTIVE;
803 		break;
804 	case LINUX_VT_GETSTATE:
805 		com = VT_GETSTATE;
806 		break;
807 	case LINUX_KDGKBTYPE:
808 		/* This is what Linux does. */
809 		return (subyte(SCARG(uap, data), KB_101));
810 	case LINUX_KDGKBENT:
811 		/*
812 		 * The Linux KDGKBENT ioctl is different from the
813 		 * SYSV original. So we handle it in machdep code.
814 		 * XXX We should use keyboard mapping information
815 		 * from wsdisplay, but this would be expensive.
816 		 */
817 		if ((error = copyin(SCARG(uap, data), &kbe,
818 				    sizeof(struct kbentry))))
819 			return (error);
820 		if (kbe.kb_table >= sizeof(linux_keytabs) / sizeof(u_short *)
821 		    || kbe.kb_index >= NR_KEYS)
822 			return (EINVAL);
823 		kbe.kb_value = linux_keytabs[kbe.kb_table][kbe.kb_index];
824 		return (copyout(&kbe, SCARG(uap, data),
825 				sizeof(struct kbentry)));
826 #endif
827 	case LINUX_HDIO_GETGEO:
828 	case LINUX_HDIO_GETGEO_BIG:
829 		/*
830 		 * Try to mimic Linux behaviour: return the BIOS geometry
831 		 * if possible (extending its # of cylinders if it's beyond
832 		 * the 1023 limit), fall back to the MI geometry (i.e.
833 		 * the real geometry) if not found, by returning an
834 		 * error. See common/linux_hdio.c
835 		 */
836 		FILE_USE(fp);
837 		bip = fd2biosinfo(p, fp);
838 		ioctlf = fp->f_ops->fo_ioctl;
839 		error = ioctlf(fp, DIOCGDEFLABEL, (caddr_t)&label, p);
840 		error1 = ioctlf(fp, DIOCGPART, (caddr_t)&partp, p);
841 		FILE_UNUSE(fp, p);
842 		if (error != 0 && error1 != 0)
843 			return error1;
844 		labp = error != 0 ? &label : partp.disklab;
845 		start = error1 != 0 ? partp.part->p_offset : 0;
846 		if (bip != NULL && bip->bi_head != 0 && bip->bi_sec != 0
847 		    && bip->bi_cyl != 0) {
848 			heads = bip->bi_head;
849 			sectors = bip->bi_sec;
850 			cylinders = bip->bi_cyl;
851 			biostotal = heads * sectors * cylinders;
852 			realtotal = labp->d_ntracks * labp->d_nsectors *
853 			    labp->d_ncylinders;
854 			if (realtotal > biostotal)
855 				cylinders = realtotal / (heads * sectors);
856 		} else {
857 			heads = labp->d_ntracks;
858 			cylinders = labp->d_ncylinders;
859 			sectors = labp->d_nsectors;
860 		}
861 		if (com == LINUX_HDIO_GETGEO) {
862 			hdg.start = start;
863 			hdg.heads = heads;
864 			hdg.cylinders = cylinders;
865 			hdg.sectors = sectors;
866 			return copyout(&hdg, SCARG(uap, data), sizeof hdg);
867 		} else {
868 			hdg_big.start = start;
869 			hdg_big.heads = heads;
870 			hdg_big.cylinders = cylinders;
871 			hdg_big.sectors = sectors;
872 			return copyout(&hdg_big, SCARG(uap, data),
873 			    sizeof hdg_big);
874 		}
875 
876 	default:
877 		/*
878 		 * Unknown to us. If it's on a device, just pass it through
879 		 * using PTIOCLINUX, the device itself might be able to
880 		 * make some sense of it.
881 		 * XXX hack: if the function returns EJUSTRETURN,
882 		 * it has stuffed a sysctl return value in pt.data.
883 		 */
884 		FILE_USE(fp);
885 		ioctlf = fp->f_ops->fo_ioctl;
886 		pt.com = SCARG(uap, com);
887 		pt.data = SCARG(uap, data);
888 		error = ioctlf(fp, PTIOCLINUX, (caddr_t)&pt, p);
889 		FILE_UNUSE(fp, p);
890 		if (error == EJUSTRETURN) {
891 			retval[0] = (register_t)pt.data;
892 			error = 0;
893 		}
894 
895 		if (error == ENOTTY)
896 			DPRINTF(("linux_machdepioctl: invalid ioctl %08lx\n",
897 			    com));
898 		return error;
899 	}
900 	SCARG(&bia, com) = com;
901 	return sys_ioctl(p, &bia, retval);
902 }
903 
904 /*
905  * Set I/O permissions for a process. Just set the maximum level
906  * right away (ignoring the argument), otherwise we would have
907  * to rely on I/O permission maps, which are not implemented.
908  */
909 int
910 linux_sys_iopl(p, v, retval)
911 	struct proc *p;
912 	void *v;
913 	register_t *retval;
914 {
915 #if 0
916 	struct linux_sys_iopl_args /* {
917 		syscallarg(int) level;
918 	} */ *uap = v;
919 #endif
920 	struct trapframe *fp = p->p_md.md_regs;
921 
922 	if (suser(p->p_ucred, &p->p_acflag) != 0)
923 		return EPERM;
924 	fp->tf_eflags |= PSL_IOPL;
925 	*retval = 0;
926 	return 0;
927 }
928 
929 /*
930  * See above. If a root process tries to set access to an I/O port,
931  * just let it have the whole range.
932  */
933 int
934 linux_sys_ioperm(p, v, retval)
935 	struct proc *p;
936 	void *v;
937 	register_t *retval;
938 {
939 	struct linux_sys_ioperm_args /* {
940 		syscallarg(unsigned int) lo;
941 		syscallarg(unsigned int) hi;
942 		syscallarg(int) val;
943 	} */ *uap = v;
944 	struct trapframe *fp = p->p_md.md_regs;
945 
946 	if (suser(p->p_ucred, &p->p_acflag) != 0)
947 		return EPERM;
948 	if (SCARG(uap, val))
949 		fp->tf_eflags |= PSL_IOPL;
950 	*retval = 0;
951 	return 0;
952 }
953