1 /* $NetBSD: syscall.c,v 1.22 2023/10/05 19:41:06 ad Exp $ */
2
3 /*-
4 * Copyright (c) 1998, 2000, 2009 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Charles M. Hannum.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
30 */
31
32 #include <sys/cdefs.h>
33 __KERNEL_RCSID(0, "$NetBSD: syscall.c,v 1.22 2023/10/05 19:41:06 ad Exp $");
34
35 #include <sys/param.h>
36 #include <sys/systm.h>
37 #include <sys/proc.h>
38 #include <sys/signal.h>
39 #include <sys/ktrace.h>
40 #include <sys/syscall.h>
41 #include <sys/syscallvar.h>
42 #include <sys/syscall_stats.h>
43
44 #include <uvm/uvm_extern.h>
45
46 #include <machine/cpu.h>
47 #include <machine/psl.h>
48 #include <machine/userret.h>
49
50 #include "opt_dtrace.h"
51
52 #ifndef __x86_64__
53 int x86_copyargs(void *, void *, size_t);
54 #endif
55
56 void syscall_intern(struct proc *);
57 static void syscall(struct trapframe *);
58
59 void
md_child_return(struct lwp * l)60 md_child_return(struct lwp *l)
61 {
62 struct trapframe *tf = l->l_md.md_regs;
63
64 X86_TF_RAX(tf) = 0;
65 X86_TF_RFLAGS(tf) &= ~PSL_C;
66
67 userret(l);
68 }
69
70 /*
71 * Process the tail end of a posix_spawn() for the child.
72 */
73 void
cpu_spawn_return(struct lwp * l)74 cpu_spawn_return(struct lwp *l)
75 {
76
77 userret(l);
78 }
79
80 /*
81 * syscall(frame):
82 * System call request from POSIX system call gate interface to kernel.
83 * Like trap(), argument is call by reference.
84 */
85 #ifdef KDTRACE_HOOKS
86 void syscall(struct trapframe *);
87 #else
88 static
89 #endif
90 void
syscall(struct trapframe * frame)91 syscall(struct trapframe *frame)
92 {
93 const struct sysent *callp;
94 struct proc *p;
95 struct lwp *l;
96 int error;
97 register_t code, rval[2];
98 #ifdef __x86_64__
99 /* Verify that the syscall args will fit in the trapframe space */
100 CTASSERT(offsetof(struct trapframe, tf_arg9) >=
101 sizeof(register_t) * (2 + SYS_MAXSYSARGS - 1));
102 #define args (&frame->tf_rdi)
103 #else
104 register_t args[2 + SYS_MAXSYSARGS];
105 #endif
106
107 l = curlwp;
108 p = l->l_proc;
109
110 code = X86_TF_RAX(frame) & (SYS_NSYSENT - 1);
111 callp = p->p_emul->e_sysent + code;
112
113 SYSCALL_COUNT(syscall_counts, code);
114 SYSCALL_TIME_SYS_ENTRY(l, syscall_times, code);
115
116 #ifdef __x86_64__
117 /*
118 * The first 6 syscall args are passed in rdi, rsi, rdx, r10, r8 and r9
119 * (rcx gets copied to r10 in the libc stub because the syscall
120 * instruction overwrites %cx) and are together in the trap frame
121 * with space following for 4 more entries.
122 */
123 if (__predict_false(callp->sy_argsize > 6 * 8)) {
124 error = copyin((register_t *)frame->tf_rsp + 1,
125 &frame->tf_arg6, callp->sy_argsize - 6 * 8);
126 if (error != 0)
127 goto bad;
128 }
129 #else
130 if (callp->sy_argsize) {
131 error = x86_copyargs((char *)frame->tf_esp + sizeof(int), args,
132 callp->sy_argsize);
133 if (__predict_false(error != 0))
134 goto bad;
135 }
136 #endif
137 error = sy_invoke(callp, l, args, rval, code);
138
139 if (__predict_true(error == 0)) {
140 X86_TF_RAX(frame) = rval[0];
141 X86_TF_RDX(frame) = rval[1];
142 X86_TF_RFLAGS(frame) &= ~PSL_C; /* carry bit */
143 } else {
144 switch (error) {
145 case ERESTART:
146 /*
147 * The offset to adjust the PC by depends on whether we
148 * entered the kernel through the trap or call gate.
149 * We saved the instruction size in tf_err on entry.
150 */
151 X86_TF_RIP(frame) -= frame->tf_err;
152 break;
153 case EJUSTRETURN:
154 /* nothing to do */
155 break;
156 default:
157 bad:
158 X86_TF_RAX(frame) = error;
159 X86_TF_RFLAGS(frame) |= PSL_C; /* carry bit */
160 break;
161 }
162 }
163
164 SYSCALL_TIME_SYS_EXIT(l);
165 userret(l);
166 }
167
168 void
syscall_intern(struct proc * p)169 syscall_intern(struct proc *p)
170 {
171
172 p->p_md.md_syscall = syscall;
173 }
174