1 /* $NetBSD: syscall.c,v 1.49 2023/10/05 19:41:06 ad Exp $ */
2
3 /*-
4 * Copyright (c) 2005 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Christos Zoulas.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
30 */
31
32 /*
33 * Copyright (c) 1996-2002 Eduardo Horvath. All rights reserved.
34 * Copyright (c) 1996
35 * The President and Fellows of Harvard College. All rights reserved.
36 * Copyright (c) 1992, 1993
37 * The Regents of the University of California. All rights reserved.
38 *
39 * This software was developed by the Computer Systems Engineering group
40 * at Lawrence Berkeley Laboratory under DARPA contract BG 91-66 and
41 * contributed to Berkeley.
42 *
43 * All advertising materials mentioning features or use of this software
44 * must display the following acknowledgement:
45 * This product includes software developed by the University of
46 * California, Lawrence Berkeley Laboratory.
47 * This product includes software developed by Harvard University.
48 *
49 * Redistribution and use in source and binary forms, with or without
50 * modification, are permitted provided that the following conditions
51 * are met:
52 * 1. Redistributions of source code must retain the above copyright
53 * notice, this list of conditions and the following disclaimer.
54 * 2. Redistributions in binary form must reproduce the above copyright
55 * notice, this list of conditions and the following disclaimer in the
56 * documentation and/or other materials provided with the distribution.
57 * 3. All advertising materials mentioning features or use of this software
58 * must display the following acknowledgement:
59 * This product includes software developed by the University of
60 * California, Berkeley and its contributors.
61 * This product includes software developed by Harvard University.
62 * 4. Neither the name of the University nor the names of its contributors
63 * may be used to endorse or promote products derived from this software
64 * without specific prior written permission.
65 *
66 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
67 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
68 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
69 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
70 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
71 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
72 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
73 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
74 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
75 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
76 * SUCH DAMAGE.
77 *
78 * @(#)trap.c 8.4 (Berkeley) 9/23/93
79 */
80
81 #include <sys/cdefs.h>
82 __KERNEL_RCSID(0, "$NetBSD: syscall.c,v 1.49 2023/10/05 19:41:06 ad Exp $");
83
84 #include <sys/param.h>
85 #include <sys/systm.h>
86 #include <sys/proc.h>
87 #include <sys/signal.h>
88 #include <sys/kmem.h>
89 #include <sys/ktrace.h>
90 #include <sys/syscall.h>
91 #include <sys/syscallvar.h>
92
93 #include <uvm/uvm_extern.h>
94
95 #include <machine/cpu.h>
96 #include <machine/ctlreg.h>
97 #include <machine/trap.h>
98 #include <machine/instr.h>
99 #include <machine/pmap.h>
100 #include <machine/frame.h>
101 #include <machine/userret.h>
102
103 #ifndef offsetof
104 #define offsetof(s, f) ((size_t)&((s *)0)->f)
105 #endif
106 #define MAXARGS 8
107
108 union args {
109 register32_t i[MAXARGS];
110 register64_t l[MAXARGS];
111 register_t r[MAXARGS];
112 };
113
114 static inline int handle_old(struct trapframe64 *, register_t *);
115 static inline int getargs(struct proc *, struct trapframe64 *,
116 register_t *, const struct sysent **, union args *, int *);
117 void syscall(struct trapframe64 *, register_t, register_t);
118
119 /*
120 * Handle old style system calls.
121 */
122 static inline int
handle_old(struct trapframe64 * tf,register_t * code)123 handle_old(struct trapframe64 *tf, register_t *code)
124 {
125 int new = *code & (SYSCALL_G7RFLAG|SYSCALL_G2RFLAG|SYSCALL_G5RFLAG);
126 *code &= ~(SYSCALL_G7RFLAG|SYSCALL_G2RFLAG|SYSCALL_G5RFLAG);
127 if (new) {
128 /* note that G5RFLAG is multiple bits! */
129 if (__predict_true((new & SYSCALL_G5RFLAG) == SYSCALL_G5RFLAG))
130 tf->tf_pc = tf->tf_global[5];
131 else if (new & SYSCALL_G7RFLAG)
132 tf->tf_pc = tf->tf_global[7];
133 else
134 tf->tf_pc = tf->tf_global[2];
135 } else {
136 tf->tf_pc = tf->tf_npc;
137 }
138 return new;
139 }
140
141
142 /*
143 * The first six system call arguments are in the six %o registers.
144 * Any arguments beyond that are in the `argument extension' area
145 * of the user's stack frame (see <machine/frame.h>).
146 *
147 * Check for ``special'' codes that alter this, namely syscall and
148 * __syscall. The latter takes a quad syscall number, so that other
149 * arguments are at their natural alignments. Adjust the number
150 * of ``easy'' arguments as appropriate; we will copy the hard
151 * ones later as needed.
152 */
153 static inline int
getargs(struct proc * p,struct trapframe64 * tf,register_t * code,const struct sysent ** callp,union args * args,int * s64)154 getargs(struct proc *p, struct trapframe64 *tf, register_t *code,
155 const struct sysent **callp, union args *args, int *s64)
156 {
157 int64_t *ap = &tf->tf_out[0];
158 int i, error, nap = 6;
159 *s64 = tf->tf_out[6] & 1L; /* Do we have a 64-bit stack? */
160
161 *callp = p->p_emul->e_sysent;
162 switch (*code) {
163 case SYS_syscall:
164 *code = *ap++;
165 nap--;
166 break;
167 case SYS___syscall:
168 if (*s64) {
169 /* longs *are* quadwords */
170 *code = ap[0];
171 ap += 1;
172 nap -= 1;
173 } else {
174 *code = ap[_QUAD_LOWWORD];
175 ap += 2;
176 nap -= 2;
177 }
178 break;
179 }
180
181 if (*code >= p->p_emul->e_nsysent)
182 return ENOSYS;
183
184 *callp += *code;
185
186 if (*s64) {
187 /* 64-bit stack -- not really supported on 32-bit kernels */
188 register64_t *argp;
189 #ifdef DEBUG
190 #ifdef __arch64__
191 if ((p->p_flag & PK_32) != 0) {
192 printf("syscall(): 64-bit stack but P_32 set\n");
193 #ifdef DDB
194 Debugger();
195 #endif
196 }
197 #else
198 printf("syscall(): 64-bit stack on a 32-bit kernel????\n");
199 #ifdef DDB
200 Debugger();
201 #endif
202 #endif
203 #endif
204 i = (*callp)->sy_narg;
205 if (__predict_false(i > nap)) { /* usually false */
206 void *pos = (char *)(u_long)tf->tf_out[6] + BIAS +
207 offsetof(struct frame64, fr_argx);
208 KASSERT(i <= MAXARGS);
209 /* Read the whole block in */
210 error = copyin(pos, &args->l[nap],
211 (i - nap) * sizeof(*argp));
212 if (error)
213 return error;
214 i = nap;
215 }
216 for (argp = args->l; i--;)
217 *argp++ = *ap++;
218 } else {
219 register32_t *argp;
220
221 i = (long)(*callp)->sy_argsize / sizeof(register32_t);
222 if (__predict_false(i > nap)) { /* usually false */
223 void *pos = (char *)(u_long)tf->tf_out[6] +
224 offsetof(struct frame32, fr_argx);
225 KASSERT(i <= MAXARGS);
226 /* Read the whole block in */
227 error = copyin(pos, &args->i[nap],
228 (i - nap) * sizeof(*argp));
229 if (error)
230 return error;
231 i = nap;
232 }
233 /* Need to convert from int64 to int32 or we lose */
234 for (argp = args->i; i--;)
235 *argp++ = *ap++;
236 }
237 return 0;
238 }
239
240 void
syscall_intern(struct proc * p)241 syscall_intern(struct proc *p)
242 {
243
244 p->p_trace_enabled = trace_is_enabled(p);
245 p->p_md.md_syscall = syscall;
246 }
247
248 /*
249 * System calls. `pc' is just a copy of tf->tf_pc.
250 *
251 * Note that the things labelled `out' registers in the trapframe were the
252 * `in' registers within the syscall trap code (because of the automatic
253 * `save' effect of each trap). They are, however, the %o registers of the
254 * thing that made the system call, and are named that way here.
255 *
256 * 32-bit system calls on a 64-bit system are a problem. Each system call
257 * argument is stored in the smaller of the argument's true size or a
258 * `register_t'. Now on a 64-bit machine all normal types can be stored in a
259 * `register_t'. (The only exceptions would be 128-bit `quad's or 128-bit
260 * extended precision floating point values, which we don't support.) For
261 * 32-bit syscalls, 64-bit integers like `off_t's, double precision floating
262 * point values, and several other types cannot fit in a 32-bit `register_t'.
263 * These will require reading in two `register_t' values for one argument.
264 *
265 * In order to calculate the true size of the arguments and therefore whether
266 * any argument needs to be split into two slots, the system call args
267 * structure needs to be built with the appropriately sized register_t.
268 * Otherwise the emul needs to do some magic to split oversized arguments.
269 *
270 * We can handle most this stuff for normal syscalls by using either a 32-bit
271 * or 64-bit array of `register_t' arguments. Unfortunately ktrace always
272 * expects arguments to be `register_t's, so it loses badly. What's worse,
273 * ktrace may need to do size translations to massage the argument array
274 * appropriately according to the emulation that is doing the ktrace.
275 *
276 */
277 void
syscall(struct trapframe64 * tf,register_t code,register_t pc)278 syscall(struct trapframe64 *tf, register_t code, register_t pc)
279 {
280 const struct sysent *callp;
281 struct lwp *l = curlwp;
282 union args args;
283 struct proc *p = l->l_proc;
284 int error, new;
285 register_t rval[2];
286 u_quad_t sticks;
287 vaddr_t opc, onpc;
288 int s64;
289
290 curcpu()->ci_data.cpu_nsyscall++;
291 sticks = p->p_sticks;
292 l->l_md.md_tf = tf;
293
294 /*
295 * save pc/npc in case of ERESTART
296 * adjust pc/npc to new values
297 */
298 opc = tf->tf_pc;
299 onpc = tf->tf_npc;
300
301 new = handle_old(tf, &code);
302
303 tf->tf_npc = tf->tf_pc + 4;
304
305 if ((error = getargs(p, tf, &code, &callp, &args, &s64)) != 0)
306 goto bad;
307
308 rval[0] = 0;
309 rval[1] = tf->tf_out[1];
310
311 #ifdef DIAGNOSTIC
312 KASSERT(p->p_pid != 0);
313 KASSERTMSG(!(tf->tf_tstate & TSTATE_PRIV),
314 "syscall %ld, pid %d trap frame %p tstate %#" PRIx64
315 " is privileged %s\n", code, p->p_pid, tf, tf->tf_tstate,
316 (tf->tf_tstate & TSTATE_PRIV) ? "yes" : "no");
317 if (p->p_flag & PK_32) {
318 KASSERTMSG(tf->tf_tstate & TSTATE_AM,
319 "32bit syscall %ld, pid %d trap frame %p tstate %#" PRIx64
320 " has AM %s\n", code, p->p_pid, tf, tf->tf_tstate,
321 (tf->tf_tstate & TSTATE_AM) ? "yes" : "no");
322 }
323 #endif
324
325 error = sy_invoke(callp, l, args.r, rval, code);
326
327 switch (error) {
328 case 0:
329 /* Note: fork() does not return here in the child */
330 tf->tf_out[0] = rval[0];
331 tf->tf_out[1] = rval[1];
332 if (!new)
333 /* old system call convention: clear C on success */
334 tf->tf_tstate &= ~(((int64_t)(ICC_C | XCC_C)) <<
335 TSTATE_CCR_SHIFT); /* success */
336 break;
337
338 case ERESTART:
339 tf->tf_pc = opc;
340 tf->tf_npc = onpc;
341 break;
342
343 case EJUSTRETURN:
344 /* nothing to do */
345 break;
346
347 default:
348 bad:
349 if (p->p_emul->e_errno)
350 error = p->p_emul->e_errno[error];
351 tf->tf_out[0] = error;
352 tf->tf_tstate |= (((int64_t)(ICC_C | XCC_C)) <<
353 TSTATE_CCR_SHIFT); /* fail */
354 tf->tf_pc = onpc;
355 tf->tf_npc = tf->tf_pc + 4;
356 break;
357 }
358
359 userret(l, pc, sticks);
360 share_fpu(l, tf);
361 }
362
363 /*
364 * Process the tail end of a fork() for the child.
365 */
366 void
md_child_return(struct lwp * l)367 md_child_return(struct lwp *l)
368 {
369
370 /*
371 * Return values in the frame set by cpu_lwp_fork().
372 */
373 userret(l, l->l_md.md_tf->tf_pc, 0);
374 }
375
376 /*
377 * Process the tail end of a posix_spawn() for the child.
378 */
379 void
cpu_spawn_return(struct lwp * l)380 cpu_spawn_return(struct lwp *l)
381 {
382
383 userret(l, l->l_md.md_tf->tf_pc, 0);
384 }
385
386 /*
387 * Start a new LWP
388 */
389 void
startlwp(void * arg)390 startlwp(void *arg)
391 {
392 ucontext_t *uc = arg;
393 lwp_t *l = curlwp;
394 int error __diagused;
395
396 error = cpu_setmcontext(l, &uc->uc_mcontext, uc->uc_flags);
397 KASSERT(error == 0);
398
399 kmem_free(uc, sizeof(ucontext_t));
400 userret(l, 0, 0);
401 }
402