xref: /netbsd-src/sys/arch/amd64/amd64/linux_syscall.c (revision ce2c90c7c172d95d2402a5b3d96d8f8e6d138a21)
1 /*	$NetBSD: linux_syscall.c,v 1.9 2006/07/19 21:11:39 ad Exp $ */
2 
3 /*-
4  * Copyright (c) 1998, 2000 The NetBSD Foundation, Inc.
5  * All rights reserved.
6  *
7  * This code is derived from software contributed to The NetBSD Foundation
8  * by Charles M. Hannum.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  * 3. All advertising materials mentioning features or use of this software
19  *    must display the following acknowledgement:
20  *        This product includes software developed by the NetBSD
21  *        Foundation, Inc. and its contributors.
22  * 4. Neither the name of The NetBSD Foundation nor the names of its
23  *    contributors may be used to endorse or promote products derived
24  *    from this software without specific prior written permission.
25  *
26  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
27  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
28  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
29  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
30  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36  * POSSIBILITY OF SUCH DAMAGE.
37  */
38 
39 #include <sys/cdefs.h>
40 __KERNEL_RCSID(0, "$NetBSD: linux_syscall.c,v 1.9 2006/07/19 21:11:39 ad Exp $");
41 
42 #include "opt_compat_linux.h"
43 
44 #include <sys/param.h>
45 #include <sys/systm.h>
46 #include <sys/proc.h>
47 #include <sys/user.h>
48 #include <sys/signal.h>
49 #include <sys/sa.h>
50 #include <sys/savar.h>
51 #include <sys/syscall.h>
52 
53 #include <uvm/uvm_extern.h>
54 
55 #include <machine/cpu.h>
56 #include <machine/psl.h>
57 #include <machine/userret.h>
58 
59 #include <compat/linux/linux_syscall.h>
60 #include <compat/linux/common/linux_types.h>
61 #include <compat/linux/common/linux_errno.h>
62 #include <compat/linux/common/linux_signal.h>
63 #include <compat/linux/common/linux_siginfo.h>
64 #include <compat/linux/arch/amd64/linux_siginfo.h>
65 #include <compat/linux/arch/amd64/linux_syscall.h>
66 #include <compat/linux/arch/amd64/linux_machdep.h>
67 #include <compat/linux/common/linux_errno.h>
68 
69 void linux_syscall_intern(struct proc *);
70 static void linux_syscall_plain(struct trapframe *);
71 static void linux_syscall_fancy(struct trapframe *);
72 
73 void
74 linux_syscall_intern(struct proc *p)
75 {
76 
77 	if (trace_is_enabled(p))
78 		p->p_md.md_syscall = linux_syscall_fancy;
79 	else
80 		p->p_md.md_syscall = linux_syscall_plain;
81 }
82 
83 /*
84  * syscall(frame):
85  *	System call request from POSIX system call gate interface to kernel.
86  * Like trap(), argument is call by reference.
87  */
88 static void
89 linux_syscall_plain(struct trapframe *frame)
90 {
91 	caddr_t params;
92 	const struct sysent *callp;
93 	struct proc *p;
94 	struct lwp *l;
95 	int error;
96 	size_t argsize, argoff;
97 	register_t code, args[9], rval[2], *argp;
98 
99 	uvmexp.syscalls++;
100 	l = curlwp;
101 	p = l->l_proc;
102 	LWP_CACHE_CREDS(l, p);
103 
104 	code = frame->tf_rax;
105 	callp = p->p_emul->e_sysent;
106 	argoff = 0;
107 	argp = &args[0];
108 
109 	code &= (LINUX_SYS_NSYSENT - 1);
110 	callp += code;
111 
112 	argsize = (callp->sy_argsize >> 3) + argoff;
113 	if (argsize) {
114 		switch (MIN(argsize, 6)) {
115 		case 6:
116 			args[5] = frame->tf_r9;
117 		case 5:
118 			args[4] = frame->tf_r8;
119 		case 4:
120 			args[3] = frame->tf_r10;
121 		case 3:
122 			args[2] = frame->tf_rdx;
123 		case 2:
124 			args[1] = frame->tf_rsi;
125 		case 1:
126 			args[0] = frame->tf_rdi;
127 			break;
128 		default:
129 			panic("impossible syscall argsize");
130 		}
131 		if (argsize > 6) {
132 			argsize -= 6;
133 			params = (caddr_t)frame->tf_rsp + sizeof(register_t);
134 			error = copyin(params, (caddr_t)&args[6],
135 					argsize << 3);
136 			if (error != 0)
137 				goto bad;
138 		}
139 	}
140 
141 	rval[0] = 0;
142 	rval[1] = 0;
143 	KERNEL_PROC_LOCK(l);
144 	error = (*callp->sy_call)(l, argp, rval);
145 	KERNEL_PROC_UNLOCK(l);
146 
147 	switch (error) {
148 	case 0:
149 		frame->tf_rax = rval[0];
150 		frame->tf_rflags &= ~PSL_C;	/* carry bit */
151 		break;
152 	case ERESTART:
153 		/*
154 		 * The offset to adjust the PC by depends on whether we entered
155 		 * the kernel through the trap or call gate.  We pushed the
156 		 * size of the instruction into tf_err on entry.
157 		 */
158 		frame->tf_rip -= frame->tf_err;
159 		break;
160 	case EJUSTRETURN:
161 		/* nothing to do */
162 		break;
163 	default:
164 	bad:
165 		frame->tf_rax = native_to_linux_errno[error];
166 		frame->tf_rflags |= PSL_C;	/* carry bit */
167 		break;
168 	}
169 
170 	userret(l);
171 }
172 
173 static void
174 linux_syscall_fancy(struct trapframe *frame)
175 {
176 	caddr_t params;
177 	const struct sysent *callp;
178 	struct proc *p;
179 	struct lwp *l;
180 	int error;
181 	size_t argsize, argoff;
182 	register_t code, args[9], rval[2], *argp;
183 
184 	uvmexp.syscalls++;
185 	l = curlwp;
186 	p = l->l_proc;
187 	LWP_CACHE_CREDS(l, p);
188 
189 	code = frame->tf_rax;
190 	callp = p->p_emul->e_sysent;
191 	argp = &args[0];
192 	argoff = 0;
193 
194 	code &= (SYS_NSYSENT - 1);
195 	callp += code;
196 
197 	argsize = (callp->sy_argsize >> 3) + argoff;
198 	if (argsize) {
199 		switch (MIN(argsize, 6)) {
200 		case 6:
201 			args[5] = frame->tf_r9;
202 		case 5:
203 			args[4] = frame->tf_r8;
204 		case 4:
205 			args[3] = frame->tf_r10;
206 		case 3:
207 			args[2] = frame->tf_rdx;
208 		case 2:
209 			args[1] = frame->tf_rsi;
210 		case 1:
211 			args[0] = frame->tf_rdi;
212 			break;
213 		default:
214 			panic("impossible syscall argsize");
215 		}
216 		if (argsize > 6) {
217 			argsize -= 6;
218 			params = (caddr_t)frame->tf_rsp + sizeof(register_t);
219 			error = copyin(params, (caddr_t)&args[6],
220 					argsize << 3);
221 			if (error != 0)
222 				goto bad;
223 		}
224 	}
225 
226 	KERNEL_PROC_LOCK(l);
227 	if ((error = trace_enter(l, code, code, NULL, argp)) != 0)
228 		goto out;
229 
230 	rval[0] = 0;
231 	rval[1] = 0;
232 	error = (*callp->sy_call)(l, argp, rval);
233 out:
234 	KERNEL_PROC_UNLOCK(l);
235 	switch (error) {
236 	case 0:
237 		frame->tf_rax = rval[0];
238 		frame->tf_rflags &= ~PSL_C;	/* carry bit */
239 		break;
240 	case ERESTART:
241 		/*
242 		 * The offset to adjust the PC by depends on whether we entered
243 		 * the kernel through the trap or call gate.  We pushed the
244 		 * size of the instruction into tf_err on entry.
245 		 */
246 		frame->tf_rip -= frame->tf_err;
247 		break;
248 	case EJUSTRETURN:
249 		/* nothing to do */
250 		break;
251 	default:
252 	bad:
253 		frame->tf_rax = native_to_linux_errno[error];
254 		frame->tf_rflags |= PSL_C;	/* carry bit */
255 		break;
256 	}
257 
258 	trace_exit(l, code, argp, rval, error);
259 
260 	userret(l);
261 }
262