1 /* $NetBSD: linux_syscall.c,v 1.4 2005/12/07 19:02:36 rjs Exp $ */ 2 3 /*- 4 * Copyright (c) 1998, 2000 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Charles M. Hannum. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. All advertising materials mentioning features or use of this software 19 * must display the following acknowledgement: 20 * This product includes software developed by the NetBSD 21 * Foundation, Inc. and its contributors. 22 * 4. Neither the name of The NetBSD Foundation nor the names of its 23 * contributors may be used to endorse or promote products derived 24 * from this software without specific prior written permission. 25 * 26 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 27 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 28 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 29 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 30 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 31 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 32 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 33 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 34 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 35 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 36 * POSSIBILITY OF SUCH DAMAGE. 37 */ 38 39 #include <sys/cdefs.h> 40 __KERNEL_RCSID(0, "$NetBSD: linux_syscall.c,v 1.4 2005/12/07 19:02:36 rjs Exp $"); 41 42 #include "opt_syscall_debug.h" 43 #include "opt_ktrace.h" 44 #include "opt_systrace.h" 45 #include "opt_compat_linux.h" 46 47 #include <sys/param.h> 48 #include <sys/systm.h> 49 #include <sys/proc.h> 50 #include <sys/user.h> 51 #include <sys/signal.h> 52 #include <sys/sa.h> 53 #include <sys/savar.h> 54 #ifdef KTRACE 55 #include <sys/ktrace.h> 56 #endif 57 #ifdef SYSTRACE 58 #include <sys/systrace.h> 59 #endif 60 #include <sys/syscall.h> 61 62 #include <uvm/uvm_extern.h> 63 64 #include <machine/cpu.h> 65 #include <machine/psl.h> 66 #include <machine/userret.h> 67 68 #include <compat/linux/linux_syscall.h> 69 #include <compat/linux/common/linux_types.h> 70 #include <compat/linux/common/linux_errno.h> 71 #include <compat/linux/common/linux_signal.h> 72 #include <compat/linux/common/linux_siginfo.h> 73 #include <compat/linux/arch/amd64/linux_siginfo.h> 74 #include <compat/linux/arch/amd64/linux_syscall.h> 75 #include <compat/linux/arch/amd64/linux_machdep.h> 76 #include <compat/linux/common/linux_errno.h> 77 78 void linux_syscall_intern(struct proc *); 79 static void linux_syscall_plain(struct trapframe *); 80 #if defined(KTRACE) || defined(SYSTRACE) 81 static void linux_syscall_fancy(struct trapframe *); 82 #endif 83 84 void 85 linux_syscall_intern(struct proc *p) 86 { 87 #ifdef KTRACE 88 if (p->p_traceflag & (KTRFAC_SYSCALL | KTRFAC_SYSRET)) { 89 p->p_md.md_syscall = linux_syscall_fancy; 90 return; 91 } 92 #endif 93 #ifdef SYSTRACE 94 if (ISSET(p->p_flag, P_SYSTRACE)) { 95 p->p_md.md_syscall = linux_syscall_fancy; 96 return; 97 } 98 #endif 99 p->p_md.md_syscall = linux_syscall_plain; 100 } 101 102 /* 103 * syscall(frame): 104 * System call request from POSIX system call gate interface to kernel. 105 * Like trap(), argument is call by reference. 106 */ 107 static void 108 linux_syscall_plain(struct trapframe *frame) 109 { 110 caddr_t params; 111 const struct sysent *callp; 112 struct proc *p; 113 struct lwp *l; 114 int error; 115 size_t argsize, argoff; 116 register_t code, args[9], rval[2], *argp; 117 118 uvmexp.syscalls++; 119 l = curlwp; 120 p = l->l_proc; 121 122 code = frame->tf_rax; 123 callp = p->p_emul->e_sysent; 124 argoff = 0; 125 argp = &args[0]; 126 127 code &= (LINUX_SYS_NSYSENT - 1); 128 callp += code; 129 130 argsize = (callp->sy_argsize >> 3) + argoff; 131 if (argsize) { 132 switch (MIN(argsize, 6)) { 133 case 6: 134 args[5] = frame->tf_r9; 135 case 5: 136 args[4] = frame->tf_r8; 137 case 4: 138 args[3] = frame->tf_r10; 139 case 3: 140 args[2] = frame->tf_rdx; 141 case 2: 142 args[1] = frame->tf_rsi; 143 case 1: 144 args[0] = frame->tf_rdi; 145 break; 146 default: 147 panic("impossible syscall argsize"); 148 } 149 if (argsize > 6) { 150 argsize -= 6; 151 params = (caddr_t)frame->tf_rsp + sizeof(register_t); 152 error = copyin(params, (caddr_t)&args[6], 153 argsize << 3); 154 if (error != 0) 155 goto bad; 156 } 157 } 158 159 #ifdef SYSCALL_DEBUG 160 scdebug_call(l, code, argp); 161 #endif /* SYSCALL_DEBUG */ 162 163 rval[0] = 0; 164 rval[1] = 0; 165 KERNEL_PROC_LOCK(l); 166 error = (*callp->sy_call)(l, argp, rval); 167 KERNEL_PROC_UNLOCK(l); 168 169 switch (error) { 170 case 0: 171 frame->tf_rax = rval[0]; 172 frame->tf_rflags &= ~PSL_C; /* carry bit */ 173 break; 174 case ERESTART: 175 /* 176 * The offset to adjust the PC by depends on whether we entered 177 * the kernel through the trap or call gate. We pushed the 178 * size of the instruction into tf_err on entry. 179 */ 180 frame->tf_rip -= frame->tf_err; 181 break; 182 case EJUSTRETURN: 183 /* nothing to do */ 184 break; 185 default: 186 bad: 187 frame->tf_rax = native_to_linux_errno[error]; 188 frame->tf_rflags |= PSL_C; /* carry bit */ 189 break; 190 } 191 192 #ifdef SYSCALL_DEBUG 193 scdebug_ret(l, code, error, rval); 194 #endif /* SYSCALL_DEBUG */ 195 userret(l); 196 } 197 198 #if defined(KTRACE) || defined(SYSTRACE) 199 static void 200 linux_syscall_fancy(struct trapframe *frame) 201 { 202 caddr_t params; 203 const struct sysent *callp; 204 struct proc *p; 205 struct lwp *l; 206 int error; 207 size_t argsize, argoff; 208 register_t code, args[9], rval[2], *argp; 209 210 uvmexp.syscalls++; 211 l = curlwp; 212 p = l->l_proc; 213 214 code = frame->tf_rax; 215 callp = p->p_emul->e_sysent; 216 argp = &args[0]; 217 argoff = 0; 218 219 code &= (SYS_NSYSENT - 1); 220 callp += code; 221 222 argsize = (callp->sy_argsize >> 3) + argoff; 223 if (argsize) { 224 switch (MIN(argsize, 6)) { 225 case 6: 226 args[5] = frame->tf_r9; 227 case 5: 228 args[4] = frame->tf_r8; 229 case 4: 230 args[3] = frame->tf_r10; 231 case 3: 232 args[2] = frame->tf_rdx; 233 case 2: 234 args[1] = frame->tf_rsi; 235 case 1: 236 args[0] = frame->tf_rdi; 237 break; 238 default: 239 panic("impossible syscall argsize"); 240 } 241 if (argsize > 6) { 242 argsize -= 6; 243 params = (caddr_t)frame->tf_rsp + sizeof(register_t); 244 error = copyin(params, (caddr_t)&args[6], 245 argsize << 3); 246 if (error != 0) 247 goto bad; 248 } 249 } 250 251 KERNEL_PROC_LOCK(l); 252 if ((error = trace_enter(l, code, code, NULL, argp)) != 0) 253 goto out; 254 255 rval[0] = 0; 256 rval[1] = 0; 257 error = (*callp->sy_call)(l, argp, rval); 258 out: 259 KERNEL_PROC_UNLOCK(l); 260 switch (error) { 261 case 0: 262 frame->tf_rax = rval[0]; 263 frame->tf_rflags &= ~PSL_C; /* carry bit */ 264 break; 265 case ERESTART: 266 /* 267 * The offset to adjust the PC by depends on whether we entered 268 * the kernel through the trap or call gate. We pushed the 269 * size of the instruction into tf_err on entry. 270 */ 271 frame->tf_rip -= frame->tf_err; 272 break; 273 case EJUSTRETURN: 274 /* nothing to do */ 275 break; 276 default: 277 bad: 278 frame->tf_rax = native_to_linux_errno[error]; 279 frame->tf_rflags |= PSL_C; /* carry bit */ 280 break; 281 } 282 283 trace_exit(l, code, argp, rval, error); 284 285 userret(l); 286 } 287 #endif 288