xref: /netbsd-src/sys/arch/amd64/amd64/linux_syscall.c (revision e5548b402ae4c44fb816de42c7bba9581ce23ef5)
1 /*	$NetBSD: linux_syscall.c,v 1.4 2005/12/07 19:02:36 rjs Exp $ */
2 
3 /*-
4  * Copyright (c) 1998, 2000 The NetBSD Foundation, Inc.
5  * All rights reserved.
6  *
7  * This code is derived from software contributed to The NetBSD Foundation
8  * by Charles M. Hannum.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  * 3. All advertising materials mentioning features or use of this software
19  *    must display the following acknowledgement:
20  *        This product includes software developed by the NetBSD
21  *        Foundation, Inc. and its contributors.
22  * 4. Neither the name of The NetBSD Foundation nor the names of its
23  *    contributors may be used to endorse or promote products derived
24  *    from this software without specific prior written permission.
25  *
26  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
27  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
28  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
29  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
30  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36  * POSSIBILITY OF SUCH DAMAGE.
37  */
38 
39 #include <sys/cdefs.h>
40 __KERNEL_RCSID(0, "$NetBSD: linux_syscall.c,v 1.4 2005/12/07 19:02:36 rjs Exp $");
41 
42 #include "opt_syscall_debug.h"
43 #include "opt_ktrace.h"
44 #include "opt_systrace.h"
45 #include "opt_compat_linux.h"
46 
47 #include <sys/param.h>
48 #include <sys/systm.h>
49 #include <sys/proc.h>
50 #include <sys/user.h>
51 #include <sys/signal.h>
52 #include <sys/sa.h>
53 #include <sys/savar.h>
54 #ifdef KTRACE
55 #include <sys/ktrace.h>
56 #endif
57 #ifdef SYSTRACE
58 #include <sys/systrace.h>
59 #endif
60 #include <sys/syscall.h>
61 
62 #include <uvm/uvm_extern.h>
63 
64 #include <machine/cpu.h>
65 #include <machine/psl.h>
66 #include <machine/userret.h>
67 
68 #include <compat/linux/linux_syscall.h>
69 #include <compat/linux/common/linux_types.h>
70 #include <compat/linux/common/linux_errno.h>
71 #include <compat/linux/common/linux_signal.h>
72 #include <compat/linux/common/linux_siginfo.h>
73 #include <compat/linux/arch/amd64/linux_siginfo.h>
74 #include <compat/linux/arch/amd64/linux_syscall.h>
75 #include <compat/linux/arch/amd64/linux_machdep.h>
76 #include <compat/linux/common/linux_errno.h>
77 
78 void linux_syscall_intern(struct proc *);
79 static void linux_syscall_plain(struct trapframe *);
80 #if defined(KTRACE) || defined(SYSTRACE)
81 static void linux_syscall_fancy(struct trapframe *);
82 #endif
83 
84 void
85 linux_syscall_intern(struct proc *p)
86 {
87 #ifdef KTRACE
88 	if (p->p_traceflag & (KTRFAC_SYSCALL | KTRFAC_SYSRET)) {
89 		p->p_md.md_syscall = linux_syscall_fancy;
90 		return;
91 	}
92 #endif
93 #ifdef SYSTRACE
94 	if (ISSET(p->p_flag, P_SYSTRACE)) {
95 		p->p_md.md_syscall = linux_syscall_fancy;
96 		return;
97 	}
98 #endif
99 	p->p_md.md_syscall = linux_syscall_plain;
100 }
101 
102 /*
103  * syscall(frame):
104  *	System call request from POSIX system call gate interface to kernel.
105  * Like trap(), argument is call by reference.
106  */
107 static void
108 linux_syscall_plain(struct trapframe *frame)
109 {
110 	caddr_t params;
111 	const struct sysent *callp;
112 	struct proc *p;
113 	struct lwp *l;
114 	int error;
115 	size_t argsize, argoff;
116 	register_t code, args[9], rval[2], *argp;
117 
118 	uvmexp.syscalls++;
119 	l = curlwp;
120 	p = l->l_proc;
121 
122 	code = frame->tf_rax;
123 	callp = p->p_emul->e_sysent;
124 	argoff = 0;
125 	argp = &args[0];
126 
127 	code &= (LINUX_SYS_NSYSENT - 1);
128 	callp += code;
129 
130 	argsize = (callp->sy_argsize >> 3) + argoff;
131 	if (argsize) {
132 		switch (MIN(argsize, 6)) {
133 		case 6:
134 			args[5] = frame->tf_r9;
135 		case 5:
136 			args[4] = frame->tf_r8;
137 		case 4:
138 			args[3] = frame->tf_r10;
139 		case 3:
140 			args[2] = frame->tf_rdx;
141 		case 2:
142 			args[1] = frame->tf_rsi;
143 		case 1:
144 			args[0] = frame->tf_rdi;
145 			break;
146 		default:
147 			panic("impossible syscall argsize");
148 		}
149 		if (argsize > 6) {
150 			argsize -= 6;
151 			params = (caddr_t)frame->tf_rsp + sizeof(register_t);
152 			error = copyin(params, (caddr_t)&args[6],
153 					argsize << 3);
154 			if (error != 0)
155 				goto bad;
156 		}
157 	}
158 
159 #ifdef SYSCALL_DEBUG
160 	scdebug_call(l, code, argp);
161 #endif /* SYSCALL_DEBUG */
162 
163 	rval[0] = 0;
164 	rval[1] = 0;
165 	KERNEL_PROC_LOCK(l);
166 	error = (*callp->sy_call)(l, argp, rval);
167 	KERNEL_PROC_UNLOCK(l);
168 
169 	switch (error) {
170 	case 0:
171 		frame->tf_rax = rval[0];
172 		frame->tf_rflags &= ~PSL_C;	/* carry bit */
173 		break;
174 	case ERESTART:
175 		/*
176 		 * The offset to adjust the PC by depends on whether we entered
177 		 * the kernel through the trap or call gate.  We pushed the
178 		 * size of the instruction into tf_err on entry.
179 		 */
180 		frame->tf_rip -= frame->tf_err;
181 		break;
182 	case EJUSTRETURN:
183 		/* nothing to do */
184 		break;
185 	default:
186 	bad:
187 		frame->tf_rax = native_to_linux_errno[error];
188 		frame->tf_rflags |= PSL_C;	/* carry bit */
189 		break;
190 	}
191 
192 #ifdef SYSCALL_DEBUG
193 	scdebug_ret(l, code, error, rval);
194 #endif /* SYSCALL_DEBUG */
195 	userret(l);
196 }
197 
198 #if defined(KTRACE) || defined(SYSTRACE)
199 static void
200 linux_syscall_fancy(struct trapframe *frame)
201 {
202 	caddr_t params;
203 	const struct sysent *callp;
204 	struct proc *p;
205 	struct lwp *l;
206 	int error;
207 	size_t argsize, argoff;
208 	register_t code, args[9], rval[2], *argp;
209 
210 	uvmexp.syscalls++;
211 	l = curlwp;
212 	p = l->l_proc;
213 
214 	code = frame->tf_rax;
215 	callp = p->p_emul->e_sysent;
216 	argp = &args[0];
217 	argoff = 0;
218 
219 	code &= (SYS_NSYSENT - 1);
220 	callp += code;
221 
222 	argsize = (callp->sy_argsize >> 3) + argoff;
223 	if (argsize) {
224 		switch (MIN(argsize, 6)) {
225 		case 6:
226 			args[5] = frame->tf_r9;
227 		case 5:
228 			args[4] = frame->tf_r8;
229 		case 4:
230 			args[3] = frame->tf_r10;
231 		case 3:
232 			args[2] = frame->tf_rdx;
233 		case 2:
234 			args[1] = frame->tf_rsi;
235 		case 1:
236 			args[0] = frame->tf_rdi;
237 			break;
238 		default:
239 			panic("impossible syscall argsize");
240 		}
241 		if (argsize > 6) {
242 			argsize -= 6;
243 			params = (caddr_t)frame->tf_rsp + sizeof(register_t);
244 			error = copyin(params, (caddr_t)&args[6],
245 					argsize << 3);
246 			if (error != 0)
247 				goto bad;
248 		}
249 	}
250 
251 	KERNEL_PROC_LOCK(l);
252 	if ((error = trace_enter(l, code, code, NULL, argp)) != 0)
253 		goto out;
254 
255 	rval[0] = 0;
256 	rval[1] = 0;
257 	error = (*callp->sy_call)(l, argp, rval);
258 out:
259 	KERNEL_PROC_UNLOCK(l);
260 	switch (error) {
261 	case 0:
262 		frame->tf_rax = rval[0];
263 		frame->tf_rflags &= ~PSL_C;	/* carry bit */
264 		break;
265 	case ERESTART:
266 		/*
267 		 * The offset to adjust the PC by depends on whether we entered
268 		 * the kernel through the trap or call gate.  We pushed the
269 		 * size of the instruction into tf_err on entry.
270 		 */
271 		frame->tf_rip -= frame->tf_err;
272 		break;
273 	case EJUSTRETURN:
274 		/* nothing to do */
275 		break;
276 	default:
277 	bad:
278 		frame->tf_rax = native_to_linux_errno[error];
279 		frame->tf_rflags |= PSL_C;	/* carry bit */
280 		break;
281 	}
282 
283 	trace_exit(l, code, argp, rval, error);
284 
285 	userret(l);
286 }
287 #endif
288