126ccf4f1SKonstantin Belousov /*- 2df57947fSPedro F. Giffuni * SPDX-License-Identifier: BSD-4-Clause 3df57947fSPedro F. Giffuni * 426ccf4f1SKonstantin Belousov * Copyright (C) 1994, David Greenman 526ccf4f1SKonstantin Belousov * Copyright (c) 1990, 1993 626ccf4f1SKonstantin Belousov * The Regents of the University of California. All rights reserved. 726ccf4f1SKonstantin Belousov * Copyright (C) 2010 Konstantin Belousov <kib@freebsd.org> 826ccf4f1SKonstantin Belousov * 926ccf4f1SKonstantin Belousov * This code is derived from software contributed to Berkeley by 1026ccf4f1SKonstantin Belousov * the University of Utah, and William Jolitz. 1126ccf4f1SKonstantin Belousov * 1226ccf4f1SKonstantin Belousov * Redistribution and use in source and binary forms, with or without 1326ccf4f1SKonstantin Belousov * modification, are permitted provided that the following conditions 1426ccf4f1SKonstantin Belousov * are met: 1526ccf4f1SKonstantin Belousov * 1. Redistributions of source code must retain the above copyright 1626ccf4f1SKonstantin Belousov * notice, this list of conditions and the following disclaimer. 1726ccf4f1SKonstantin Belousov * 2. Redistributions in binary form must reproduce the above copyright 1826ccf4f1SKonstantin Belousov * notice, this list of conditions and the following disclaimer in the 1926ccf4f1SKonstantin Belousov * documentation and/or other materials provided with the distribution. 2026ccf4f1SKonstantin Belousov * 3. All advertising materials mentioning features or use of this software 2126ccf4f1SKonstantin Belousov * must display the following acknowledgement: 2226ccf4f1SKonstantin Belousov * This product includes software developed by the University of 2326ccf4f1SKonstantin Belousov * California, Berkeley and its contributors. 2426ccf4f1SKonstantin Belousov * 4. Neither the name of the University nor the names of its contributors 2526ccf4f1SKonstantin Belousov * may be used to endorse or promote products derived from this software 2626ccf4f1SKonstantin Belousov * without specific prior written permission. 2726ccf4f1SKonstantin Belousov * 2826ccf4f1SKonstantin Belousov * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 2926ccf4f1SKonstantin Belousov * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 3026ccf4f1SKonstantin Belousov * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 3126ccf4f1SKonstantin Belousov * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 3226ccf4f1SKonstantin Belousov * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 3326ccf4f1SKonstantin Belousov * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 3426ccf4f1SKonstantin Belousov * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 3526ccf4f1SKonstantin Belousov * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 3626ccf4f1SKonstantin Belousov * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 3726ccf4f1SKonstantin Belousov * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 3826ccf4f1SKonstantin Belousov * SUCH DAMAGE. 3926ccf4f1SKonstantin Belousov */ 4026ccf4f1SKonstantin Belousov 4126ccf4f1SKonstantin Belousov #include "opt_capsicum.h" 4226ccf4f1SKonstantin Belousov #include "opt_ktrace.h" 434a144410SRobert Watson #include <sys/capsicum.h> 4426ccf4f1SKonstantin Belousov #include <sys/ktr.h> 45fef09913SGleb Smirnoff #include <sys/vmmeter.h> 4626ccf4f1SKonstantin Belousov #ifdef KTRACE 4726ccf4f1SKonstantin Belousov #include <sys/uio.h> 4826ccf4f1SKonstantin Belousov #include <sys/ktrace.h> 4926ccf4f1SKonstantin Belousov #endif 5026ccf4f1SKonstantin Belousov #include <security/audit/audit.h> 5126ccf4f1SKonstantin Belousov 52c18ca749SJohn Baldwin static inline void 532d88da2fSKonstantin Belousov syscallenter(struct thread *td) 5426ccf4f1SKonstantin Belousov { 5526ccf4f1SKonstantin Belousov struct proc *p; 562d88da2fSKonstantin Belousov struct syscall_args *sa; 57bdc0cb4eSEdward Tomasz Napierala struct sysent *se; 5826ccf4f1SKonstantin Belousov int error, traced; 59a1bd83feSEdward Tomasz Napierala bool sy_thr_static; 6026ccf4f1SKonstantin Belousov 6183c9dea1SGleb Smirnoff VM_CNT_INC(v_syscall); 6226ccf4f1SKonstantin Belousov p = td->td_proc; 632d88da2fSKonstantin Belousov sa = &td->td_sa; 6426ccf4f1SKonstantin Belousov 6526ccf4f1SKonstantin Belousov td->td_pticks = 0; 66b53133a7SMateusz Guzik if (__predict_false(td->td_cowgen != atomic_load_int(&p->p_cowgen))) 674ea6a9a2SMateusz Guzik thread_cow_update(td); 68bdd64116SJohn Baldwin traced = (p->p_flag & P_TRACED) != 0; 690e84a878SMateusz Guzik if (__predict_false(traced || td->td_dbgflags & TDB_USERWR)) { 7026ccf4f1SKonstantin Belousov PROC_LOCK(p); 71f0592b3cSKonstantin Belousov MPASS((td->td_dbgflags & TDB_BOUNDARY) == 0); 7226ccf4f1SKonstantin Belousov td->td_dbgflags &= ~TDB_USERWR; 73bdd64116SJohn Baldwin if (traced) 7426ccf4f1SKonstantin Belousov td->td_dbgflags |= TDB_SCE; 7526ccf4f1SKonstantin Belousov PROC_UNLOCK(p); 76bdd64116SJohn Baldwin } 772d88da2fSKonstantin Belousov error = (p->p_sysent->sv_fetch_syscall_args)(td); 78bdc0cb4eSEdward Tomasz Napierala se = sa->callp; 7926ccf4f1SKonstantin Belousov #ifdef KTRACE 8026ccf4f1SKonstantin Belousov if (KTRPOINT(td, KTR_SYSCALL)) 81bdc0cb4eSEdward Tomasz Napierala ktrsyscall(sa->code, se->sy_narg, sa->args); 8226ccf4f1SKonstantin Belousov #endif 834c44811cSJeff Roberson KTR_START4(KTR_SYSC, "syscall", syscallname(p, sa->code), 847fc3ae51SOleksandr Tymoshenko (uintptr_t)td, "pid:%d", td->td_proc->p_pid, "arg0:%p", sa->args[0], 854c44811cSJeff Roberson "arg1:%p", sa->args[1], "arg2:%p", sa->args[2]); 8626ccf4f1SKonstantin Belousov 870e84a878SMateusz Guzik if (__predict_false(error != 0)) { 881af9474bSJohn Baldwin td->td_errno = error; 89c26541e3SJohn Baldwin goto retval; 901af9474bSJohn Baldwin } 914c44811cSJeff Roberson 9234098649SEdward Tomasz Napierala if (__predict_false(traced)) { 93343b391fSKonstantin Belousov PROC_LOCK(p); 948d570f64SJohn Baldwin if (p->p_ptevents & PTRACE_SCE) 9582a4538fSEric Badger ptracestop((td), SIGTRAP, NULL); 96343b391fSKonstantin Belousov PROC_UNLOCK(p); 97da45ea6bSEdward Tomasz Napierala 98da45ea6bSEdward Tomasz Napierala if ((td->td_dbgflags & TDB_USERWR) != 0) { 9926ccf4f1SKonstantin Belousov /* 100c26541e3SJohn Baldwin * Reread syscall number and arguments if debugger 101c26541e3SJohn Baldwin * modified registers or memory. 10226ccf4f1SKonstantin Belousov */ 1032d88da2fSKonstantin Belousov error = (p->p_sysent->sv_fetch_syscall_args)(td); 104bdc0cb4eSEdward Tomasz Napierala se = sa->callp; 10526ccf4f1SKonstantin Belousov #ifdef KTRACE 10626ccf4f1SKonstantin Belousov if (KTRPOINT(td, KTR_SYSCALL)) 107bdc0cb4eSEdward Tomasz Napierala ktrsyscall(sa->code, se->sy_narg, sa->args); 10826ccf4f1SKonstantin Belousov #endif 1091af9474bSJohn Baldwin if (error != 0) { 1101af9474bSJohn Baldwin td->td_errno = error; 11126ccf4f1SKonstantin Belousov goto retval; 11226ccf4f1SKonstantin Belousov } 1131af9474bSJohn Baldwin } 114da45ea6bSEdward Tomasz Napierala } 11526ccf4f1SKonstantin Belousov 11626ccf4f1SKonstantin Belousov #ifdef CAPABILITY_MODE 11726ccf4f1SKonstantin Belousov /* 11826ccf4f1SKonstantin Belousov * In capability mode, we only allow access to system calls 11926ccf4f1SKonstantin Belousov * flagged with SYF_CAPENABLED. 12026ccf4f1SKonstantin Belousov */ 12105296a0fSJake Freeland if ((se->sy_flags & SYF_CAPENABLED) == 0) { 12205296a0fSJake Freeland if (CAP_TRACING(td)) 12305296a0fSJake Freeland ktrcapfail(CAPFAIL_SYSCALL, NULL); 12405296a0fSJake Freeland if (IN_CAPABILITY_MODE(td)) { 1251af9474bSJohn Baldwin td->td_errno = error = ECAPMODE; 12626ccf4f1SKonstantin Belousov goto retval; 12726ccf4f1SKonstantin Belousov } 12805296a0fSJake Freeland } 12926ccf4f1SKonstantin Belousov #endif 13026ccf4f1SKonstantin Belousov 131146fc63fSKonstantin Belousov /* 132a113b17fSKonstantin Belousov * Fetch fast sigblock value at the time of syscall entry to 133a113b17fSKonstantin Belousov * handle sleepqueue primitives which might call cursig(). 134146fc63fSKonstantin Belousov */ 135a113b17fSKonstantin Belousov if (__predict_false(sigfastblock_fetch_always)) 136a113b17fSKonstantin Belousov (void)sigfastblock_fetch(td); 137146fc63fSKonstantin Belousov 1382f729243SMateusz Guzik /* Let system calls set td_errno directly. */ 1394c6f466cSEdward Tomasz Napierala KASSERT((td->td_pflags & TDP_NERRNO) == 0, 1404c6f466cSEdward Tomasz Napierala ("%s: TDP_NERRNO set", __func__)); 14126ccf4f1SKonstantin Belousov 142a1bd83feSEdward Tomasz Napierala sy_thr_static = (se->sy_thrcnt & SY_THR_STATIC) != 0; 143a1bd83feSEdward Tomasz Napierala 144*f78fe930SMark Johnston if (__predict_false(AUDIT_SYSCALL_ENABLED() || 145*f78fe930SMark Johnston SYSTRACE_ENABLED() || !sy_thr_static)) { 146a1bd83feSEdward Tomasz Napierala if (!sy_thr_static) { 14739024a89SKonstantin Belousov error = syscall_thread_enter(td, &se); 14839024a89SKonstantin Belousov sy_thr_static = (se->sy_thrcnt & SY_THR_STATIC) != 0; 149a1bd83feSEdward Tomasz Napierala if (error != 0) { 150a1bd83feSEdward Tomasz Napierala td->td_errno = error; 151a1bd83feSEdward Tomasz Napierala goto retval; 152a1bd83feSEdward Tomasz Napierala } 153a1bd83feSEdward Tomasz Napierala } 154a1bd83feSEdward Tomasz Napierala 1552f729243SMateusz Guzik #ifdef KDTRACE_HOOKS 1562f729243SMateusz Guzik /* Give the syscall:::entry DTrace probe a chance to fire. */ 157bdc0cb4eSEdward Tomasz Napierala if (__predict_false(se->sy_entry != 0)) 1582f729243SMateusz Guzik (*systrace_probe_func)(sa, SYSTRACE_ENTRY, 0); 1592f729243SMateusz Guzik #endif 160*f78fe930SMark Johnston 161*f78fe930SMark Johnston AUDIT_SYSCALL_ENTER(sa->code, td); 162*f78fe930SMark Johnston 163bdc0cb4eSEdward Tomasz Napierala error = (se->sy_call)(td, sa->args); 16426ccf4f1SKonstantin Belousov /* Save the latest error return value. */ 1654c6f466cSEdward Tomasz Napierala if (__predict_false((td->td_pflags & TDP_NERRNO) != 0)) 1664c6f466cSEdward Tomasz Napierala td->td_pflags &= ~TDP_NERRNO; 1674c6f466cSEdward Tomasz Napierala else 16826ccf4f1SKonstantin Belousov td->td_errno = error; 169275c821dSKyle Evans 170275c821dSKyle Evans /* 171275c821dSKyle Evans * Note that some syscall implementations (e.g., sys_execve) 172275c821dSKyle Evans * will commit the audit record just before their final return. 173275c821dSKyle Evans * These were done under the assumption that nothing of interest 174275c821dSKyle Evans * would happen between their return and here, where we would 175275c821dSKyle Evans * normally commit the audit record. These assumptions will 176275c821dSKyle Evans * need to be revisited should any substantial logic be added 177275c821dSKyle Evans * above. 178275c821dSKyle Evans */ 1792f729243SMateusz Guzik AUDIT_SYSCALL_EXIT(error, td); 180275c821dSKyle Evans 18126ccf4f1SKonstantin Belousov #ifdef KDTRACE_HOOKS 1828ff6d9ddSMark Johnston /* Give the syscall:::return DTrace probe a chance to fire. */ 183bdc0cb4eSEdward Tomasz Napierala if (__predict_false(se->sy_return != 0)) 1848ff6d9ddSMark Johnston (*systrace_probe_func)(sa, SYSTRACE_RETURN, 1858ff6d9ddSMark Johnston error ? -1 : td->td_retval[0]); 18626ccf4f1SKonstantin Belousov #endif 187a1bd83feSEdward Tomasz Napierala 188a1bd83feSEdward Tomasz Napierala if (!sy_thr_static) 189a1bd83feSEdward Tomasz Napierala syscall_thread_exit(td, se); 1902f729243SMateusz Guzik } else { 191bdc0cb4eSEdward Tomasz Napierala error = (se->sy_call)(td, sa->args); 1922f729243SMateusz Guzik /* Save the latest error return value. */ 1934c6f466cSEdward Tomasz Napierala if (__predict_false((td->td_pflags & TDP_NERRNO) != 0)) 1944c6f466cSEdward Tomasz Napierala td->td_pflags &= ~TDP_NERRNO; 1954c6f466cSEdward Tomasz Napierala else 1962f729243SMateusz Guzik td->td_errno = error; 1972f729243SMateusz Guzik } 198c26541e3SJohn Baldwin 19926ccf4f1SKonstantin Belousov retval: 2004c44811cSJeff Roberson KTR_STOP4(KTR_SYSC, "syscall", syscallname(p, sa->code), 2017fc3ae51SOleksandr Tymoshenko (uintptr_t)td, "pid:%d", td->td_proc->p_pid, "error:%d", error, 2024c44811cSJeff Roberson "retval0:%#lx", td->td_retval[0], "retval1:%#lx", 2034c44811cSJeff Roberson td->td_retval[1]); 2040e84a878SMateusz Guzik if (__predict_false(traced)) { 20526ccf4f1SKonstantin Belousov PROC_LOCK(p); 206f0592b3cSKonstantin Belousov td->td_dbgflags &= ~(TDB_SCE | TDB_BOUNDARY); 20726ccf4f1SKonstantin Belousov PROC_UNLOCK(p); 20826ccf4f1SKonstantin Belousov } 20926ccf4f1SKonstantin Belousov (p->p_sysent->sv_set_syscall_retval)(td, error); 21026ccf4f1SKonstantin Belousov } 21126ccf4f1SKonstantin Belousov 21226ccf4f1SKonstantin Belousov static inline void 213c18ca749SJohn Baldwin syscallret(struct thread *td) 21426ccf4f1SKonstantin Belousov { 2157d065d87SMateusz Guzik struct proc *p; 2162d88da2fSKonstantin Belousov struct syscall_args *sa; 217643f6f47SKonstantin Belousov ksiginfo_t ksi; 2181af9474bSJohn Baldwin int traced; 21926ccf4f1SKonstantin Belousov 220441eb16aSKonstantin Belousov KASSERT(td->td_errno != ERELOOKUP, 221441eb16aSKonstantin Belousov ("ERELOOKUP not consumed syscall %d", td->td_sa.code)); 222aff57357SEd Schouten 22326ccf4f1SKonstantin Belousov p = td->td_proc; 2242d88da2fSKonstantin Belousov sa = &td->td_sa; 2250e84a878SMateusz Guzik if (__predict_false(td->td_errno == ENOTCAPABLE || 2260e84a878SMateusz Guzik td->td_errno == ECAPMODE)) { 2270e84a878SMateusz Guzik if ((trap_enotcap || 2280e84a878SMateusz Guzik (p->p_flag2 & P2_TRAPCAP) != 0) && IN_CAPABILITY_MODE(td)) { 229643f6f47SKonstantin Belousov ksiginfo_init_trap(&ksi); 230643f6f47SKonstantin Belousov ksi.ksi_signo = SIGTRAP; 2311af9474bSJohn Baldwin ksi.ksi_errno = td->td_errno; 232643f6f47SKonstantin Belousov ksi.ksi_code = TRAP_CAP; 233cf98bc28SDavid Chisnall ksi.ksi_info.si_syscall = sa->original_code; 234643f6f47SKonstantin Belousov trapsignal(td, &ksi); 235643f6f47SKonstantin Belousov } 236643f6f47SKonstantin Belousov } 23726ccf4f1SKonstantin Belousov 23826ccf4f1SKonstantin Belousov /* 23926ccf4f1SKonstantin Belousov * Handle reschedule and other end-of-syscall issues 24026ccf4f1SKonstantin Belousov */ 24126ccf4f1SKonstantin Belousov userret(td, td->td_frame); 24226ccf4f1SKonstantin Belousov 24326ccf4f1SKonstantin Belousov #ifdef KTRACE 2442dd9ea6fSKonstantin Belousov if (KTRPOINT(td, KTR_SYSRET)) { 2451af9474bSJohn Baldwin ktrsysret(sa->code, td->td_errno, td->td_retval[0]); 2462dd9ea6fSKonstantin Belousov } 24726ccf4f1SKonstantin Belousov #endif 24826ccf4f1SKonstantin Belousov 2490e84a878SMateusz Guzik traced = 0; 2500e84a878SMateusz Guzik if (__predict_false(p->p_flag & P_TRACED)) { 25126ccf4f1SKonstantin Belousov traced = 1; 25226ccf4f1SKonstantin Belousov PROC_LOCK(p); 25326ccf4f1SKonstantin Belousov td->td_dbgflags |= TDB_SCX; 25426ccf4f1SKonstantin Belousov PROC_UNLOCK(p); 2550e84a878SMateusz Guzik } 2560e84a878SMateusz Guzik if (__predict_false(traced || 2570e84a878SMateusz Guzik (td->td_dbgflags & (TDB_EXEC | TDB_FORK)) != 0)) { 25826ccf4f1SKonstantin Belousov PROC_LOCK(p); 259ce8bd78bSKonstantin Belousov /* 2606e66030cSEdward Tomasz Napierala * Linux debuggers expect an additional stop for exec, 2616e66030cSEdward Tomasz Napierala * between the usual syscall entry and exit. Raise 2626e66030cSEdward Tomasz Napierala * the exec event now and then clear TDB_EXEC so that 2636e66030cSEdward Tomasz Napierala * the next stop is reported as a syscall exit by 2646e66030cSEdward Tomasz Napierala * linux_ptrace_status(). 2658bbc0600SEdward Tomasz Napierala * 2668bbc0600SEdward Tomasz Napierala * We are accessing p->p_pptr without any additional 2678bbc0600SEdward Tomasz Napierala * locks here: it cannot change while p is kept locked; 2688bbc0600SEdward Tomasz Napierala * while the debugger could in theory change its ABI 2698bbc0600SEdward Tomasz Napierala * while tracing another process, the outcome of such 2708bbc0600SEdward Tomasz Napierala * a race wouln't be deterministic anyway. 2716e66030cSEdward Tomasz Napierala */ 2728bbc0600SEdward Tomasz Napierala if (traced && (td->td_dbgflags & TDB_EXEC) != 0 && 2738bbc0600SEdward Tomasz Napierala SV_PROC_ABI(p->p_pptr) == SV_ABI_LINUX) { 2746e66030cSEdward Tomasz Napierala ptracestop(td, SIGTRAP, NULL); 2756e66030cSEdward Tomasz Napierala td->td_dbgflags &= ~TDB_EXEC; 2766e66030cSEdward Tomasz Napierala } 2776e66030cSEdward Tomasz Napierala /* 278ce8bd78bSKonstantin Belousov * If tracing the execed process, trap to the debugger 279ce8bd78bSKonstantin Belousov * so that breakpoints can be set before the program 280ce8bd78bSKonstantin Belousov * executes. If debugger requested tracing of syscall 281ce8bd78bSKonstantin Belousov * returns, do it now too. 282ce8bd78bSKonstantin Belousov */ 2836ad1ff09SKonstantin Belousov if (traced && 2846ad1ff09SKonstantin Belousov ((td->td_dbgflags & (TDB_FORK | TDB_EXEC)) != 0 || 285f0592b3cSKonstantin Belousov (p->p_ptevents & PTRACE_SCX) != 0)) { 286f0592b3cSKonstantin Belousov MPASS((td->td_dbgflags & TDB_BOUNDARY) == 0); 287f0592b3cSKonstantin Belousov td->td_dbgflags |= TDB_BOUNDARY; 28882a4538fSEric Badger ptracestop(td, SIGTRAP, NULL); 289f0592b3cSKonstantin Belousov } 290f0592b3cSKonstantin Belousov td->td_dbgflags &= ~(TDB_SCX | TDB_EXEC | TDB_FORK | 291f0592b3cSKonstantin Belousov TDB_BOUNDARY); 29226ccf4f1SKonstantin Belousov PROC_UNLOCK(p); 29326ccf4f1SKonstantin Belousov } 29426ccf4f1SKonstantin Belousov } 295