10Sstevel@tonic-gate/* 20Sstevel@tonic-gate * CDDL HEADER START 30Sstevel@tonic-gate * 40Sstevel@tonic-gate * The contents of this file are subject to the terms of the 52712Snn35248 * Common Development and Distribution License (the "License"). 62712Snn35248 * You may not use this file except in compliance with the License. 70Sstevel@tonic-gate * 80Sstevel@tonic-gate * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 90Sstevel@tonic-gate * or http://www.opensolaris.org/os/licensing. 100Sstevel@tonic-gate * See the License for the specific language governing permissions 110Sstevel@tonic-gate * and limitations under the License. 120Sstevel@tonic-gate * 130Sstevel@tonic-gate * When distributing Covered Code, include this CDDL HEADER in each 140Sstevel@tonic-gate * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 150Sstevel@tonic-gate * If applicable, add the following below this CDDL HEADER, with the 160Sstevel@tonic-gate * fields enclosed by brackets "[]" replaced with your own identifying 170Sstevel@tonic-gate * information: Portions Copyright [yyyy] [name of copyright owner] 180Sstevel@tonic-gate * 190Sstevel@tonic-gate * CDDL HEADER END 200Sstevel@tonic-gate */ 210Sstevel@tonic-gate/* 22*12613SSurya.Prakki@Sun.COM * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved. 230Sstevel@tonic-gate */ 240Sstevel@tonic-gate 250Sstevel@tonic-gate/* Copyright (c) 1990, 1991 UNIX System Laboratories, Inc. */ 260Sstevel@tonic-gate/* Copyright (c) 1984, 1986, 1987, 1988, 1989, 1990 AT&T */ 270Sstevel@tonic-gate/* All Rights Reserved */ 280Sstevel@tonic-gate 290Sstevel@tonic-gate/* Copyright (c) 1987, 1988 Microsoft Corporation */ 300Sstevel@tonic-gate/* All Rights Reserved */ 310Sstevel@tonic-gate 320Sstevel@tonic-gate#include <sys/asm_linkage.h> 330Sstevel@tonic-gate#include <sys/asm_misc.h> 340Sstevel@tonic-gate#include <sys/regset.h> 350Sstevel@tonic-gate#include <sys/psw.h> 360Sstevel@tonic-gate#include <sys/x86_archext.h> 372712Snn35248#include <sys/machbrand.h> 383446Smrj#include <sys/privregs.h> 390Sstevel@tonic-gate 400Sstevel@tonic-gate#if defined(__lint) 410Sstevel@tonic-gate 420Sstevel@tonic-gate#include <sys/types.h> 430Sstevel@tonic-gate#include <sys/thread.h> 440Sstevel@tonic-gate#include <sys/systm.h> 450Sstevel@tonic-gate 460Sstevel@tonic-gate#else /* __lint */ 470Sstevel@tonic-gate 480Sstevel@tonic-gate#include <sys/segments.h> 490Sstevel@tonic-gate#include <sys/pcb.h> 500Sstevel@tonic-gate#include <sys/trap.h> 510Sstevel@tonic-gate#include <sys/ftrace.h> 520Sstevel@tonic-gate#include <sys/traptrace.h> 530Sstevel@tonic-gate#include <sys/clock.h> 540Sstevel@tonic-gate#include <sys/panic.h> 550Sstevel@tonic-gate#include "assym.h" 560Sstevel@tonic-gate 570Sstevel@tonic-gate#endif /* __lint */ 580Sstevel@tonic-gate 590Sstevel@tonic-gate/* 600Sstevel@tonic-gate * We implement two flavours of system call entry points 610Sstevel@tonic-gate * 620Sstevel@tonic-gate * - {int,lcall}/iret (i386) 630Sstevel@tonic-gate * - sysenter/sysexit (Pentium II and beyond) 640Sstevel@tonic-gate * 650Sstevel@tonic-gate * The basic pattern used in the handlers is to check to see if we can 660Sstevel@tonic-gate * do fast (simple) version of the system call; if we can't we use various 670Sstevel@tonic-gate * C routines that handle corner cases and debugging. 680Sstevel@tonic-gate * 690Sstevel@tonic-gate * To reduce the amount of assembler replication, yet keep the system call 700Sstevel@tonic-gate * implementations vaguely comprehensible, the common code in the body 710Sstevel@tonic-gate * of the handlers is broken up into a set of preprocessor definitions 720Sstevel@tonic-gate * below. 730Sstevel@tonic-gate */ 740Sstevel@tonic-gate 750Sstevel@tonic-gate/* 760Sstevel@tonic-gate * When we have SYSCALLTRACE defined, we sneak an extra 770Sstevel@tonic-gate * predicate into a couple of tests. 780Sstevel@tonic-gate */ 790Sstevel@tonic-gate#if defined(SYSCALLTRACE) 800Sstevel@tonic-gate#define ORL_SYSCALLTRACE(r32) \ 810Sstevel@tonic-gate orl syscalltrace, r32 820Sstevel@tonic-gate#else 830Sstevel@tonic-gate#define ORL_SYSCALLTRACE(r32) 840Sstevel@tonic-gate#endif 850Sstevel@tonic-gate 860Sstevel@tonic-gate/* 870Sstevel@tonic-gate * This check is false whenever we want to go fast i.e. 880Sstevel@tonic-gate * 890Sstevel@tonic-gate * if (code >= NSYSCALL || 900Sstevel@tonic-gate * t->t_pre_sys || (t->t_proc_flag & TP_WATCHPT) != 0) 910Sstevel@tonic-gate * do full version 920Sstevel@tonic-gate * #ifdef SYSCALLTRACE 930Sstevel@tonic-gate * if (syscalltrace) 940Sstevel@tonic-gate * do full version 950Sstevel@tonic-gate * #endif 960Sstevel@tonic-gate * 970Sstevel@tonic-gate * Preconditions: 980Sstevel@tonic-gate * - t curthread 990Sstevel@tonic-gate * - code contains the syscall number 1000Sstevel@tonic-gate * Postconditions: 1010Sstevel@tonic-gate * - %ecx and %edi are smashed 1020Sstevel@tonic-gate * - condition code flag ZF is cleared if pre-sys is too complex 1030Sstevel@tonic-gate */ 1040Sstevel@tonic-gate#define CHECK_PRESYS_NE(t, code) \ 1050Sstevel@tonic-gate movzbl T_PRE_SYS(t), %edi; \ 1060Sstevel@tonic-gate movzwl T_PROC_FLAG(t), %ecx; \ 1070Sstevel@tonic-gate andl $TP_WATCHPT, %ecx; \ 1080Sstevel@tonic-gate orl %ecx, %edi; \ 1090Sstevel@tonic-gate cmpl $NSYSCALL, code; \ 1100Sstevel@tonic-gate setae %cl; \ 1110Sstevel@tonic-gate movzbl %cl, %ecx; \ 1120Sstevel@tonic-gate orl %ecx, %edi; \ 1130Sstevel@tonic-gate ORL_SYSCALLTRACE(%edi) 1140Sstevel@tonic-gate 1152712Snn35248/* 11610395Sgerald.jelinek@sun.com * Check if a brand_mach_ops callback is defined for the specified callback_id 11711685Sgerald.jelinek@sun.com * type. If so invoke it with the user's %gs value loaded and the following 11810395Sgerald.jelinek@sun.com * data on the stack: 1192712Snn35248 * -------------------------------------- 12011685Sgerald.jelinek@sun.com * | user's %ss | 12111685Sgerald.jelinek@sun.com * | | user's %esp | 12211685Sgerald.jelinek@sun.com * | | EFLAGS register | 12311685Sgerald.jelinek@sun.com * | | user's %cs | 12411685Sgerald.jelinek@sun.com * | | user's %eip (user return address) | 12511685Sgerald.jelinek@sun.com * | | 'scratch space' | 12611685Sgerald.jelinek@sun.com * | | user's %ebx | 12711685Sgerald.jelinek@sun.com * | | user's %gs selector | 12811685Sgerald.jelinek@sun.com * v | lwp pointer | 1292712Snn35248 * | callback wrapper return addr | 1302712Snn35248 * -------------------------------------- 1312712Snn35248 * 1322712Snn35248 * If the brand code returns, we assume that we are meant to execute the 1332712Snn35248 * normal system call path. 13411685Sgerald.jelinek@sun.com * 13511685Sgerald.jelinek@sun.com * The interface to the brand callbacks on the 32-bit kernel assumes %ebx 13611685Sgerald.jelinek@sun.com * is available as a scratch register within the callback. If the callback 13711685Sgerald.jelinek@sun.com * returns within the kernel then this macro will restore %ebx. If the 13811685Sgerald.jelinek@sun.com * callback is going to return directly to userland then it should restore 13911685Sgerald.jelinek@sun.com * %ebx before returning to userland. 1402712Snn35248 */ 1412712Snn35248#define BRAND_CALLBACK(callback_id) \ 1422712Snn35248 subl $4, %esp /* save some scratch space */ ;\ 1432712Snn35248 pushl %ebx /* save %ebx to use for scratch */ ;\ 1442712Snn35248 pushl %gs /* save the user %gs */ ;\ 1452712Snn35248 movl $KGS_SEL, %ebx ;\ 1462744Snn35248 movw %bx, %gs /* switch to the kernel's %gs */ ;\ 1472712Snn35248 movl %gs:CPU_THREAD, %ebx /* load the thread pointer */ ;\ 1482712Snn35248 movl T_LWP(%ebx), %ebx /* load the lwp pointer */ ;\ 1495144Ssp92102 pushl %ebx /* push the lwp pointer */ ;\ 1502712Snn35248 movl LWP_PROCP(%ebx), %ebx /* load the proc pointer */ ;\ 1512712Snn35248 movl P_BRAND(%ebx), %ebx /* load the brand pointer */ ;\ 1522712Snn35248 movl B_MACHOPS(%ebx), %ebx /* load the machops pointer */ ;\ 1532712Snn35248 movl _CONST(_MUL(callback_id, CPTRSIZE))(%ebx), %ebx ;\ 1542712Snn35248 cmpl $0, %ebx ;\ 1552712Snn35248 je 1f ;\ 15611685Sgerald.jelinek@sun.com movl %ebx, 12(%esp) /* save callback to scratch */ ;\ 15711685Sgerald.jelinek@sun.com movl 4(%esp), %ebx /* grab the user %gs */ ;\ 1582744Snn35248 movw %bx, %gs /* restore the user %gs */ ;\ 15911685Sgerald.jelinek@sun.com call *12(%esp) /* call callback in scratch */ ;\ 16011685Sgerald.jelinek@sun.com1: movl 4(%esp), %ebx /* restore user %gs (re-do if */ ;\ 16111685Sgerald.jelinek@sun.com movw %bx, %gs /* branch due to no callback) */ ;\ 16211685Sgerald.jelinek@sun.com movl 8(%esp), %ebx /* restore user's %ebx */ ;\ 16311685Sgerald.jelinek@sun.com addl $16, %esp /* restore stack ptr */ 1642712Snn35248 1650Sstevel@tonic-gate#define MSTATE_TRANSITION(from, to) \ 1660Sstevel@tonic-gate pushl $to; \ 1670Sstevel@tonic-gate pushl $from; \ 1680Sstevel@tonic-gate call syscall_mstate; \ 1690Sstevel@tonic-gate addl $0x8, %esp 1700Sstevel@tonic-gate 1710Sstevel@tonic-gate/* 1720Sstevel@tonic-gate * aka CPU_STATS_ADDQ(CPU, sys.syscall, 1) 1730Sstevel@tonic-gate * This must be called with interrupts or preemption disabled. 1740Sstevel@tonic-gate */ 1750Sstevel@tonic-gate#define CPU_STATS_SYS_SYSCALL_INC \ 1760Sstevel@tonic-gate addl $1, %gs:CPU_STATS_SYS_SYSCALL; \ 1770Sstevel@tonic-gate adcl $0, %gs:CPU_STATS_SYS_SYSCALL+4; 1780Sstevel@tonic-gate 1790Sstevel@tonic-gate#if !defined(__lint) 1800Sstevel@tonic-gate 1810Sstevel@tonic-gate/* 1820Sstevel@tonic-gate * ASSERT(lwptoregs(lwp) == rp); 1830Sstevel@tonic-gate * 1840Sstevel@tonic-gate * this may seem obvious, but very odd things happen if this 1850Sstevel@tonic-gate * assertion is false 1860Sstevel@tonic-gate * 1870Sstevel@tonic-gate * Preconditions: 1880Sstevel@tonic-gate * -none- 1890Sstevel@tonic-gate * Postconditions (if assertion is true): 1900Sstevel@tonic-gate * %esi and %edi are smashed 1910Sstevel@tonic-gate */ 1920Sstevel@tonic-gate#if defined(DEBUG) 1930Sstevel@tonic-gate 1940Sstevel@tonic-gate__lwptoregs_msg: 1957542SRichard.Bean@Sun.COM .string "syscall_asm.s:%d lwptoregs(%p) [%p] != rp [%p]" 1960Sstevel@tonic-gate 1970Sstevel@tonic-gate#define ASSERT_LWPTOREGS(t, rp) \ 1980Sstevel@tonic-gate movl T_LWP(t), %esi; \ 1990Sstevel@tonic-gate movl LWP_REGS(%esi), %edi; \ 2000Sstevel@tonic-gate cmpl rp, %edi; \ 2010Sstevel@tonic-gate je 7f; \ 2020Sstevel@tonic-gate pushl rp; \ 2030Sstevel@tonic-gate pushl %edi; \ 2040Sstevel@tonic-gate pushl %esi; \ 2050Sstevel@tonic-gate pushl $__LINE__; \ 2060Sstevel@tonic-gate pushl $__lwptoregs_msg; \ 2070Sstevel@tonic-gate call panic; \ 2080Sstevel@tonic-gate7: 2090Sstevel@tonic-gate#else 2100Sstevel@tonic-gate#define ASSERT_LWPTOREGS(t, rp) 2110Sstevel@tonic-gate#endif 2120Sstevel@tonic-gate 2130Sstevel@tonic-gate#endif /* __lint */ 2140Sstevel@tonic-gate 2150Sstevel@tonic-gate/* 2160Sstevel@tonic-gate * This is an assembler version of this fragment: 2170Sstevel@tonic-gate * 2180Sstevel@tonic-gate * lwp->lwp_state = LWP_SYS; 2190Sstevel@tonic-gate * lwp->lwp_ru.sysc++; 2200Sstevel@tonic-gate * lwp->lwp_eosys = NORMALRETURN; 2210Sstevel@tonic-gate * lwp->lwp_ap = argp; 2220Sstevel@tonic-gate * 2230Sstevel@tonic-gate * Preconditions: 2240Sstevel@tonic-gate * -none- 2250Sstevel@tonic-gate * Postconditions: 2260Sstevel@tonic-gate * -none- 2270Sstevel@tonic-gate */ 2280Sstevel@tonic-gate#define SET_LWP(lwp, argp) \ 2290Sstevel@tonic-gate movb $LWP_SYS, LWP_STATE(lwp); \ 2300Sstevel@tonic-gate addl $1, LWP_RU_SYSC(lwp); \ 2310Sstevel@tonic-gate adcl $0, LWP_RU_SYSC+4(lwp); \ 2320Sstevel@tonic-gate movb $NORMALRETURN, LWP_EOSYS(lwp); \ 2330Sstevel@tonic-gate movl argp, LWP_AP(lwp) 2340Sstevel@tonic-gate 2350Sstevel@tonic-gate/* 2360Sstevel@tonic-gate * Set up the thread, lwp, find the handler, and copy 2370Sstevel@tonic-gate * in the arguments from userland to the kernel stack. 2380Sstevel@tonic-gate * 2390Sstevel@tonic-gate * Preconditions: 2400Sstevel@tonic-gate * - %eax contains the syscall number 2410Sstevel@tonic-gate * Postconditions: 2420Sstevel@tonic-gate * - %eax contains a pointer to the sysent structure 2430Sstevel@tonic-gate * - %ecx is zeroed 2440Sstevel@tonic-gate * - %esi, %edi are smashed 2450Sstevel@tonic-gate * - %esp is SYS_DROPped ready for the syscall 2460Sstevel@tonic-gate */ 2470Sstevel@tonic-gate#define SIMPLE_SYSCALL_PRESYS(t, faultlabel) \ 2480Sstevel@tonic-gate movl T_LWP(t), %esi; \ 2490Sstevel@tonic-gate movw %ax, T_SYSNUM(t); \ 2500Sstevel@tonic-gate subl $SYS_DROP, %esp; \ 2510Sstevel@tonic-gate shll $SYSENT_SIZE_SHIFT, %eax; \ 2520Sstevel@tonic-gate SET_LWP(%esi, %esp); \ 2530Sstevel@tonic-gate leal sysent(%eax), %eax; \ 2540Sstevel@tonic-gate movzbl SY_NARG(%eax), %ecx; \ 2550Sstevel@tonic-gate testl %ecx, %ecx; \ 2560Sstevel@tonic-gate jz 4f; \ 2570Sstevel@tonic-gate movl %esp, %edi; \ 2580Sstevel@tonic-gate movl SYS_DROP + REGOFF_UESP(%esp), %esi; \ 2590Sstevel@tonic-gate movl $faultlabel, T_LOFAULT(t); \ 2600Sstevel@tonic-gate addl $4, %esi; \ 2610Sstevel@tonic-gate rep; \ 2620Sstevel@tonic-gate smovl; \ 2630Sstevel@tonic-gate movl %ecx, T_LOFAULT(t); \ 2640Sstevel@tonic-gate4: 2650Sstevel@tonic-gate 2660Sstevel@tonic-gate/* 2670Sstevel@tonic-gate * Check to see if a simple return is possible i.e. 2680Sstevel@tonic-gate * 2690Sstevel@tonic-gate * if ((t->t_post_sys_ast | syscalltrace) != 0) 2700Sstevel@tonic-gate * do full version; 2710Sstevel@tonic-gate * 2720Sstevel@tonic-gate * Preconditions: 2730Sstevel@tonic-gate * - t is curthread 2740Sstevel@tonic-gate * Postconditions: 2750Sstevel@tonic-gate * - condition code NE is set if post-sys is too complex 2760Sstevel@tonic-gate * - rtmp is zeroed if it isn't (we rely on this!) 2770Sstevel@tonic-gate */ 2780Sstevel@tonic-gate#define CHECK_POSTSYS_NE(t, rtmp) \ 2790Sstevel@tonic-gate xorl rtmp, rtmp; \ 2800Sstevel@tonic-gate ORL_SYSCALLTRACE(rtmp); \ 2810Sstevel@tonic-gate orl T_POST_SYS_AST(t), rtmp; \ 2820Sstevel@tonic-gate cmpl $0, rtmp 2830Sstevel@tonic-gate 2840Sstevel@tonic-gate/* 2850Sstevel@tonic-gate * Fix up the lwp, thread, and eflags for a successful return 2860Sstevel@tonic-gate * 2870Sstevel@tonic-gate * Preconditions: 2880Sstevel@tonic-gate * - zwreg contains zero 2890Sstevel@tonic-gate * Postconditions: 2900Sstevel@tonic-gate * - %esp has been unSYS_DROPped 2910Sstevel@tonic-gate * - %esi is smashed (points to lwp) 2920Sstevel@tonic-gate */ 2930Sstevel@tonic-gate#define SIMPLE_SYSCALL_POSTSYS(t, zwreg) \ 2940Sstevel@tonic-gate movl T_LWP(t), %esi; \ 2950Sstevel@tonic-gate addl $SYS_DROP, %esp; \ 2960Sstevel@tonic-gate movw zwreg, T_SYSNUM(t); \ 2970Sstevel@tonic-gate movb $LWP_USER, LWP_STATE(%esi); \ 2980Sstevel@tonic-gate andb $_CONST(0xffff - PS_C), REGOFF_EFL(%esp) 2990Sstevel@tonic-gate 3000Sstevel@tonic-gate/* 3010Sstevel@tonic-gate * System call handler. This is the destination of both the call 3020Sstevel@tonic-gate * gate (lcall 0x27) _and_ the interrupt gate (int 0x91). For our purposes, 3030Sstevel@tonic-gate * there are two significant differences between an interrupt gate and a call 3040Sstevel@tonic-gate * gate: 3050Sstevel@tonic-gate * 3060Sstevel@tonic-gate * 1) An interrupt gate runs the handler with interrupts disabled, whereas a 3070Sstevel@tonic-gate * call gate runs the handler with whatever EFLAGS settings were in effect at 3080Sstevel@tonic-gate * the time of the call. 3090Sstevel@tonic-gate * 3100Sstevel@tonic-gate * 2) An interrupt gate pushes the contents of the EFLAGS register at the time 3110Sstevel@tonic-gate * of the interrupt onto the stack, whereas a call gate does not. 3120Sstevel@tonic-gate * 3130Sstevel@tonic-gate * Because we use the following code sequence to handle system calls made from 3140Sstevel@tonic-gate * _both_ a call gate _and_ an interrupt gate, these two differences must be 3150Sstevel@tonic-gate * respected. In regards to number 1) above, the handler must ensure that a sane 3160Sstevel@tonic-gate * EFLAGS snapshot is stored on the stack so that when the kernel returns back 3170Sstevel@tonic-gate * to the user via iret (which returns to user with the EFLAGS value saved on 3180Sstevel@tonic-gate * the stack), interrupts are re-enabled. 3190Sstevel@tonic-gate * 3200Sstevel@tonic-gate * In regards to number 2) above, the handler must always put a current snapshot 3210Sstevel@tonic-gate * of EFLAGS onto the stack in the appropriate place. If we came in via an 3220Sstevel@tonic-gate * interrupt gate, we will be clobbering the EFLAGS value that was pushed by 3230Sstevel@tonic-gate * the interrupt gate. This is OK, as the only bit that was changed by the 3240Sstevel@tonic-gate * hardware was the IE (interrupt enable) bit, which for an interrupt gate is 3250Sstevel@tonic-gate * now off. If we were to do nothing, the stack would contain an EFLAGS with 3260Sstevel@tonic-gate * IE off, resulting in us eventually returning back to the user with interrupts 3270Sstevel@tonic-gate * disabled. The solution is to turn on the IE bit in the EFLAGS value saved on 3280Sstevel@tonic-gate * the stack. 3290Sstevel@tonic-gate * 3300Sstevel@tonic-gate * Another subtlety which deserves mention is the difference between the two 3310Sstevel@tonic-gate * descriptors. The call gate descriptor is set to instruct the hardware to copy 3320Sstevel@tonic-gate * one parameter from the user stack to the kernel stack, whereas the interrupt 3330Sstevel@tonic-gate * gate descriptor doesn't use the parameter passing mechanism at all. The 3340Sstevel@tonic-gate * kernel doesn't actually use the parameter that is copied by the hardware; the 3350Sstevel@tonic-gate * only reason it does this is so that there is a space on the stack large 3360Sstevel@tonic-gate * enough to hold an EFLAGS register value, which happens to be in the correct 3370Sstevel@tonic-gate * place for use by iret when we go back to userland. How convenient. 3380Sstevel@tonic-gate * 3390Sstevel@tonic-gate * Stack frame description in syscall() and callees. 3400Sstevel@tonic-gate * 3410Sstevel@tonic-gate * |------------| 3420Sstevel@tonic-gate * | regs | +(8*4)+4 registers 3430Sstevel@tonic-gate * |------------| 3440Sstevel@tonic-gate * | 8 args | <- %esp MAXSYSARGS (currently 8) arguments 3450Sstevel@tonic-gate * |------------| 3460Sstevel@tonic-gate * 3470Sstevel@tonic-gate */ 3480Sstevel@tonic-gate#define SYS_DROP _CONST(_MUL(MAXSYSARGS, 4)) 3490Sstevel@tonic-gate 3500Sstevel@tonic-gate#if defined(__lint) 3510Sstevel@tonic-gate 3520Sstevel@tonic-gate/*ARGSUSED*/ 3530Sstevel@tonic-gatevoid 3540Sstevel@tonic-gatesys_call() 3550Sstevel@tonic-gate{} 3560Sstevel@tonic-gate 3570Sstevel@tonic-gatevoid 3580Sstevel@tonic-gate_allsyscalls() 3590Sstevel@tonic-gate{} 3600Sstevel@tonic-gate 3610Sstevel@tonic-gatesize_t _allsyscalls_size; 3620Sstevel@tonic-gate 3630Sstevel@tonic-gate#else /* __lint */ 3640Sstevel@tonic-gate 3652712Snn35248 ENTRY_NP2(brand_sys_call, _allsyscalls) 3662712Snn35248 BRAND_CALLBACK(BRAND_CB_SYSCALL) 3670Sstevel@tonic-gate 3682712Snn35248 ALTENTRY(sys_call) 3690Sstevel@tonic-gate / on entry eax = system call number 3702712Snn35248 3710Sstevel@tonic-gate / set up the stack to look as in reg.h 3720Sstevel@tonic-gate subl $8, %esp / pad the stack with ERRCODE and TRAPNO 3730Sstevel@tonic-gate 3740Sstevel@tonic-gate SYSCALL_PUSH 3750Sstevel@tonic-gate 3760Sstevel@tonic-gate#ifdef TRAPTRACE 3770Sstevel@tonic-gate TRACE_PTR(%edi, %ebx, %ebx, %ecx, $TT_SYSCALL) / Uses labels "8" and "9" 3780Sstevel@tonic-gate TRACE_REGS(%edi, %esp, %ebx, %ecx) / Uses label "9" 3790Sstevel@tonic-gate pushl %eax 3800Sstevel@tonic-gate TRACE_STAMP(%edi) / Clobbers %eax, %edx, uses "9" 3810Sstevel@tonic-gate popl %eax 3820Sstevel@tonic-gate movl %eax, TTR_SYSNUM(%edi) 3830Sstevel@tonic-gate#endif 3840Sstevel@tonic-gate 3850Sstevel@tonic-gate_watch_do_syscall: 3860Sstevel@tonic-gate movl %esp, %ebp 3870Sstevel@tonic-gate 3883446Smrj / Interrupts may be enabled here, so we must make sure this thread 3893446Smrj / doesn't migrate off the CPU while it updates the CPU stats. 3903446Smrj / 3913446Smrj / XXX This is only true if we got here via call gate thru the LDT for 3923446Smrj / old style syscalls. Perhaps this preempt++-- will go away soon? 3933446Smrj movl %gs:CPU_THREAD, %ebx 3943446Smrj addb $1, T_PREEMPT(%ebx) 3953446Smrj CPU_STATS_SYS_SYSCALL_INC 3963446Smrj subb $1, T_PREEMPT(%ebx) 3973446Smrj 3983446Smrj ENABLE_INTR_FLAGS 3993446Smrj 4000Sstevel@tonic-gate pushl %eax / preserve across mstate call 4010Sstevel@tonic-gate MSTATE_TRANSITION(LMS_USER, LMS_SYSTEM) 4020Sstevel@tonic-gate popl %eax 4030Sstevel@tonic-gate 4040Sstevel@tonic-gate movl %gs:CPU_THREAD, %ebx 4050Sstevel@tonic-gate 4060Sstevel@tonic-gate ASSERT_LWPTOREGS(%ebx, %esp) 4070Sstevel@tonic-gate 4080Sstevel@tonic-gate CHECK_PRESYS_NE(%ebx, %eax) 4090Sstevel@tonic-gate jne _full_syscall_presys 4100Sstevel@tonic-gate SIMPLE_SYSCALL_PRESYS(%ebx, _syscall_fault) 4110Sstevel@tonic-gate 4120Sstevel@tonic-gate_syslcall_call: 4130Sstevel@tonic-gate call *SY_CALLC(%eax) 4140Sstevel@tonic-gate 4150Sstevel@tonic-gate_syslcall_done: 4160Sstevel@tonic-gate CHECK_POSTSYS_NE(%ebx, %ecx) 4170Sstevel@tonic-gate jne _full_syscall_postsys 4180Sstevel@tonic-gate SIMPLE_SYSCALL_POSTSYS(%ebx, %cx) 4190Sstevel@tonic-gate movl %eax, REGOFF_EAX(%esp) 4200Sstevel@tonic-gate movl %edx, REGOFF_EDX(%esp) 4210Sstevel@tonic-gate 4220Sstevel@tonic-gate MSTATE_TRANSITION(LMS_SYSTEM, LMS_USER) 4230Sstevel@tonic-gate 4240Sstevel@tonic-gate / 4250Sstevel@tonic-gate / get back via iret 4260Sstevel@tonic-gate / 4273446Smrj CLI(%edx) 4283446Smrj jmp sys_rtt_syscall 4290Sstevel@tonic-gate 4300Sstevel@tonic-gate_full_syscall_presys: 4310Sstevel@tonic-gate movl T_LWP(%ebx), %esi 4320Sstevel@tonic-gate subl $SYS_DROP, %esp 4330Sstevel@tonic-gate movb $LWP_SYS, LWP_STATE(%esi) 4340Sstevel@tonic-gate pushl %esp 4350Sstevel@tonic-gate pushl %ebx 4360Sstevel@tonic-gate call syscall_entry 4370Sstevel@tonic-gate addl $8, %esp 4380Sstevel@tonic-gate jmp _syslcall_call 4390Sstevel@tonic-gate 4400Sstevel@tonic-gate_full_syscall_postsys: 4410Sstevel@tonic-gate addl $SYS_DROP, %esp 4420Sstevel@tonic-gate pushl %edx 4430Sstevel@tonic-gate pushl %eax 4440Sstevel@tonic-gate pushl %ebx 4450Sstevel@tonic-gate call syscall_exit 4460Sstevel@tonic-gate addl $12, %esp 4470Sstevel@tonic-gate MSTATE_TRANSITION(LMS_SYSTEM, LMS_USER) 4483446Smrj jmp _sys_rtt 4490Sstevel@tonic-gate 4500Sstevel@tonic-gate_syscall_fault: 4510Sstevel@tonic-gate push $0xe / EFAULT 4520Sstevel@tonic-gate call set_errno 4530Sstevel@tonic-gate addl $4, %esp 4540Sstevel@tonic-gate xorl %eax, %eax / fake syscall_err() 4550Sstevel@tonic-gate xorl %edx, %edx 4560Sstevel@tonic-gate jmp _syslcall_done 4570Sstevel@tonic-gate SET_SIZE(sys_call) 4582712Snn35248 SET_SIZE(brand_sys_call) 4590Sstevel@tonic-gate 4600Sstevel@tonic-gate#endif /* __lint */ 4610Sstevel@tonic-gate 4620Sstevel@tonic-gate/* 4630Sstevel@tonic-gate * System call handler via the sysenter instruction 4640Sstevel@tonic-gate * 4650Sstevel@tonic-gate * Here's how syscall entry usually works (see sys_call for details). 4660Sstevel@tonic-gate * 4670Sstevel@tonic-gate * There, the caller (lcall or int) in userland has arranged that: 4680Sstevel@tonic-gate * 4690Sstevel@tonic-gate * - %eax contains the syscall number 4700Sstevel@tonic-gate * - the user stack contains the args to the syscall 4710Sstevel@tonic-gate * 4720Sstevel@tonic-gate * Normally the lcall instruction into the call gate causes the processor 4730Sstevel@tonic-gate * to push %ss, %esp, <top-of-stack>, %cs, %eip onto the kernel stack. 4740Sstevel@tonic-gate * The sys_call handler then leaves space for r_trapno and r_err, and 4750Sstevel@tonic-gate * pusha's {%eax, %ecx, %edx, %ebx, %esp, %ebp, %esi, %edi}, followed 4760Sstevel@tonic-gate * by %ds, %es, %fs and %gs to capture a 'struct regs' on the stack. 4770Sstevel@tonic-gate * Then the kernel sets %ds, %es and %gs to kernel selectors, and finally 4780Sstevel@tonic-gate * extracts %efl and puts it into r_efl (which happens to live at the offset 4790Sstevel@tonic-gate * that <top-of-stack> was copied into). Note that the value in r_efl has 4800Sstevel@tonic-gate * the IF (interrupt enable) flag turned on. (The int instruction into the 4810Sstevel@tonic-gate * interrupt gate does essentially the same thing, only instead of 4820Sstevel@tonic-gate * <top-of-stack> we get eflags - see comment above.) 4830Sstevel@tonic-gate * 4840Sstevel@tonic-gate * In the sysenter case, things are a lot more primitive. 4850Sstevel@tonic-gate * 4860Sstevel@tonic-gate * The caller in userland has arranged that: 4870Sstevel@tonic-gate * 4880Sstevel@tonic-gate * - %eax contains the syscall number 4890Sstevel@tonic-gate * - %ecx contains the user %esp 4900Sstevel@tonic-gate * - %edx contains the return %eip 4910Sstevel@tonic-gate * - the user stack contains the args to the syscall 4920Sstevel@tonic-gate * 4930Sstevel@tonic-gate * e.g. 4940Sstevel@tonic-gate * <args on the stack> 4950Sstevel@tonic-gate * mov $SYS_callnum, %eax 4960Sstevel@tonic-gate * mov $1f, %edx / return %eip 4970Sstevel@tonic-gate * mov %esp, %ecx / return %esp 4980Sstevel@tonic-gate * sysenter 4990Sstevel@tonic-gate * 1: 5000Sstevel@tonic-gate * 5010Sstevel@tonic-gate * Hardware and (privileged) initialization code have arranged that by 5020Sstevel@tonic-gate * the time the sysenter instructions completes: 5030Sstevel@tonic-gate * 5040Sstevel@tonic-gate * - %eip is pointing to sys_sysenter (below). 5050Sstevel@tonic-gate * - %cs and %ss are set to kernel text and stack (data) selectors. 5060Sstevel@tonic-gate * - %esp is pointing at the lwp's stack 5070Sstevel@tonic-gate * - Interrupts have been disabled. 5080Sstevel@tonic-gate * 5090Sstevel@tonic-gate * The task for the sysenter handler is: 5100Sstevel@tonic-gate * 5110Sstevel@tonic-gate * - recreate the same regs structure on the stack and the same 5120Sstevel@tonic-gate * kernel state as if we'd come in on an lcall 5130Sstevel@tonic-gate * - do the normal work of a syscall 5140Sstevel@tonic-gate * - execute the system call epilogue, use sysexit to return to userland. 5150Sstevel@tonic-gate * 5160Sstevel@tonic-gate * Note that we are unable to return both "rvals" to userland with this 5170Sstevel@tonic-gate * call, as %edx is used by the sysexit instruction. 5182712Snn35248 * 5192712Snn35248 * One final complication in this routine is its interaction with 5202712Snn35248 * single-stepping in a debugger. For most of the system call mechanisms, 5212712Snn35248 * the CPU automatically clears the single-step flag before we enter the 5222712Snn35248 * kernel. The sysenter mechanism does not clear the flag, so a user 5232712Snn35248 * single-stepping through a libc routine may suddenly find him/herself 5242712Snn35248 * single-stepping through the kernel. To detect this, kmdb compares the 5252712Snn35248 * trap %pc to the [brand_]sys_enter addresses on each single-step trap. 5262712Snn35248 * If it finds that we have single-stepped to a sysenter entry point, it 5272712Snn35248 * explicitly clears the flag and executes the sys_sysenter routine. 5282712Snn35248 * 5292712Snn35248 * One final complication in this final complication is the fact that we 5302712Snn35248 * have two different entry points for sysenter: brand_sys_sysenter and 5312712Snn35248 * sys_sysenter. If we enter at brand_sys_sysenter and start single-stepping 5322712Snn35248 * through the kernel with kmdb, we will eventually hit the instruction at 5332712Snn35248 * sys_sysenter. kmdb cannot distinguish between that valid single-step 5342712Snn35248 * and the undesirable one mentioned above. To avoid this situation, we 5352712Snn35248 * simply add a jump over the instruction at sys_sysenter to make it 5362712Snn35248 * impossible to single-step to it. 5370Sstevel@tonic-gate */ 5380Sstevel@tonic-gate#if defined(__lint) 5390Sstevel@tonic-gate 5400Sstevel@tonic-gatevoid 5410Sstevel@tonic-gatesys_sysenter() 5420Sstevel@tonic-gate{} 5430Sstevel@tonic-gate 5440Sstevel@tonic-gate#else /* __lint */ 5450Sstevel@tonic-gate 5462712Snn35248 ENTRY_NP(brand_sys_sysenter) 5472712Snn35248 pushl %edx 5482712Snn35248 BRAND_CALLBACK(BRAND_CB_SYSENTER) 5492712Snn35248 popl %edx 5502712Snn35248 /* 5512712Snn35248 * Jump over sys_sysenter to allow single-stepping as described 5522712Snn35248 * above. 5532712Snn35248 */ 5542712Snn35248 ja 1f 5552712Snn35248 5562712Snn35248 ALTENTRY(sys_sysenter) 5572712Snn35248 nop 5582712Snn352481: 5590Sstevel@tonic-gate / 5600Sstevel@tonic-gate / do what the call gate would've done to the stack .. 5610Sstevel@tonic-gate / 5620Sstevel@tonic-gate pushl $UDS_SEL / (really %ss, but it's the same ..) 5630Sstevel@tonic-gate pushl %ecx / userland makes this a copy of %esp 5640Sstevel@tonic-gate pushfl 5650Sstevel@tonic-gate orl $PS_IE, (%esp) / turn interrupts on when we return to user 5660Sstevel@tonic-gate pushl $UCS_SEL 5670Sstevel@tonic-gate pushl %edx / userland makes this a copy of %eip 5680Sstevel@tonic-gate / 5690Sstevel@tonic-gate / done. finish building the stack frame 5700Sstevel@tonic-gate / 5710Sstevel@tonic-gate subl $8, %esp / leave space for ERR and TRAPNO 5720Sstevel@tonic-gate 5730Sstevel@tonic-gate SYSENTER_PUSH 5740Sstevel@tonic-gate 5750Sstevel@tonic-gate#ifdef TRAPTRACE 5760Sstevel@tonic-gate TRACE_PTR(%edi, %ebx, %ebx, %ecx, $TT_SYSENTER) / uses labels 8 and 9 5770Sstevel@tonic-gate TRACE_REGS(%edi, %esp, %ebx, %ecx) / uses label 9 5780Sstevel@tonic-gate pushl %eax 5790Sstevel@tonic-gate TRACE_STAMP(%edi) / clobbers %eax, %edx, uses label 9 5800Sstevel@tonic-gate popl %eax 5810Sstevel@tonic-gate movl %eax, TTR_SYSNUM(%edi) 5820Sstevel@tonic-gate#endif 5830Sstevel@tonic-gate movl %esp, %ebp 5840Sstevel@tonic-gate 5850Sstevel@tonic-gate CPU_STATS_SYS_SYSCALL_INC 5860Sstevel@tonic-gate 5870Sstevel@tonic-gate ENABLE_INTR_FLAGS 5880Sstevel@tonic-gate 5890Sstevel@tonic-gate pushl %eax / preserve across mstate call 5900Sstevel@tonic-gate MSTATE_TRANSITION(LMS_USER, LMS_SYSTEM) 5910Sstevel@tonic-gate popl %eax 5920Sstevel@tonic-gate 5930Sstevel@tonic-gate movl %gs:CPU_THREAD, %ebx 5940Sstevel@tonic-gate 5950Sstevel@tonic-gate ASSERT_LWPTOREGS(%ebx, %esp) 5960Sstevel@tonic-gate 5970Sstevel@tonic-gate CHECK_PRESYS_NE(%ebx, %eax) 5980Sstevel@tonic-gate jne _full_syscall_presys 5990Sstevel@tonic-gate SIMPLE_SYSCALL_PRESYS(%ebx, _syscall_fault) 6000Sstevel@tonic-gate 6010Sstevel@tonic-gate_sysenter_call: 6020Sstevel@tonic-gate call *SY_CALLC(%eax) 6030Sstevel@tonic-gate 6040Sstevel@tonic-gate_sysenter_done: 6050Sstevel@tonic-gate CHECK_POSTSYS_NE(%ebx, %ecx) 6060Sstevel@tonic-gate jne _full_syscall_postsys 6070Sstevel@tonic-gate SIMPLE_SYSCALL_POSTSYS(%ebx, %cx) 6080Sstevel@tonic-gate / 6090Sstevel@tonic-gate / sysexit uses %edx to restore %eip, so we can't use it 6100Sstevel@tonic-gate / to return a value, sigh. 6110Sstevel@tonic-gate / 6120Sstevel@tonic-gate movl %eax, REGOFF_EAX(%esp) 6130Sstevel@tonic-gate / movl %edx, REGOFF_EDX(%esp) 6140Sstevel@tonic-gate 6150Sstevel@tonic-gate / Interrupts will be turned on by the 'sti' executed just before 6160Sstevel@tonic-gate / sysexit. The following ensures that restoring the user's EFLAGS 6170Sstevel@tonic-gate / doesn't enable interrupts too soon. 6180Sstevel@tonic-gate andl $_BITNOT(PS_IE), REGOFF_EFL(%esp) 6190Sstevel@tonic-gate 6200Sstevel@tonic-gate MSTATE_TRANSITION(LMS_SYSTEM, LMS_USER) 6210Sstevel@tonic-gate 6220Sstevel@tonic-gate cli 6230Sstevel@tonic-gate 6240Sstevel@tonic-gate SYSCALL_POP 6250Sstevel@tonic-gate 6260Sstevel@tonic-gate popl %edx / sysexit: %edx -> %eip 6270Sstevel@tonic-gate addl $4, %esp / get CS off the stack 6280Sstevel@tonic-gate popfl / EFL 6290Sstevel@tonic-gate popl %ecx / sysexit: %ecx -> %esp 6300Sstevel@tonic-gate sti 6310Sstevel@tonic-gate sysexit 6320Sstevel@tonic-gate SET_SIZE(sys_sysenter) 6332712Snn35248 SET_SIZE(brand_sys_sysenter) 6342712Snn35248 6350Sstevel@tonic-gate/* 6360Sstevel@tonic-gate * Declare a uintptr_t which covers the entire pc range of syscall 6370Sstevel@tonic-gate * handlers for the stack walkers that need this. 6380Sstevel@tonic-gate */ 6390Sstevel@tonic-gate .align CPTRSIZE 6400Sstevel@tonic-gate .globl _allsyscalls_size 6410Sstevel@tonic-gate .type _allsyscalls_size, @object 6420Sstevel@tonic-gate_allsyscalls_size: 6430Sstevel@tonic-gate .NWORD . - _allsyscalls 6440Sstevel@tonic-gate SET_SIZE(_allsyscalls_size) 6450Sstevel@tonic-gate 6460Sstevel@tonic-gate#endif /* __lint */ 6470Sstevel@tonic-gate 6480Sstevel@tonic-gate/* 6490Sstevel@tonic-gate * These are the thread context handlers for lwps using sysenter/sysexit. 6500Sstevel@tonic-gate */ 6510Sstevel@tonic-gate 6520Sstevel@tonic-gate#if defined(__lint) 6530Sstevel@tonic-gate 6540Sstevel@tonic-gate/*ARGSUSED*/ 6550Sstevel@tonic-gatevoid 6560Sstevel@tonic-gatesep_save(void *ksp) 6570Sstevel@tonic-gate{} 6580Sstevel@tonic-gate 6590Sstevel@tonic-gate/*ARGSUSED*/ 6600Sstevel@tonic-gatevoid 6610Sstevel@tonic-gatesep_restore(void *ksp) 6620Sstevel@tonic-gate{} 6630Sstevel@tonic-gate 6640Sstevel@tonic-gate#else /* __lint */ 6650Sstevel@tonic-gate 6660Sstevel@tonic-gate /* 6670Sstevel@tonic-gate * setting this value to zero as we switch away causes the 6680Sstevel@tonic-gate * stack-pointer-on-sysenter to be NULL, ensuring that we 6690Sstevel@tonic-gate * don't silently corrupt another (preempted) thread stack 6700Sstevel@tonic-gate * when running an lwp that (somehow) didn't get sep_restore'd 6710Sstevel@tonic-gate */ 6720Sstevel@tonic-gate ENTRY_NP(sep_save) 6730Sstevel@tonic-gate xorl %edx, %edx 6740Sstevel@tonic-gate xorl %eax, %eax 6750Sstevel@tonic-gate movl $MSR_INTC_SEP_ESP, %ecx 6760Sstevel@tonic-gate wrmsr 6770Sstevel@tonic-gate ret 6780Sstevel@tonic-gate SET_SIZE(sep_save) 6790Sstevel@tonic-gate 6800Sstevel@tonic-gate /* 6810Sstevel@tonic-gate * Update the kernel stack pointer as we resume onto this cpu. 6820Sstevel@tonic-gate */ 6830Sstevel@tonic-gate ENTRY_NP(sep_restore) 6840Sstevel@tonic-gate movl 4(%esp), %eax /* per-lwp kernel sp */ 6850Sstevel@tonic-gate xorl %edx, %edx 6860Sstevel@tonic-gate movl $MSR_INTC_SEP_ESP, %ecx 6870Sstevel@tonic-gate wrmsr 6880Sstevel@tonic-gate ret 6890Sstevel@tonic-gate SET_SIZE(sep_restore) 6900Sstevel@tonic-gate 6910Sstevel@tonic-gate#endif /* __lint */ 6920Sstevel@tonic-gate 6930Sstevel@tonic-gate/* 6940Sstevel@tonic-gate * Call syscall(). Called from trap() on watchpoint at lcall 0,7 6950Sstevel@tonic-gate */ 6960Sstevel@tonic-gate 6970Sstevel@tonic-gate#if defined(__lint) 6980Sstevel@tonic-gate 6990Sstevel@tonic-gatevoid 7000Sstevel@tonic-gatewatch_syscall(void) 7010Sstevel@tonic-gate{} 7020Sstevel@tonic-gate 7030Sstevel@tonic-gate#else /* __lint */ 7040Sstevel@tonic-gate 7050Sstevel@tonic-gate ENTRY_NP(watch_syscall) 7063446Smrj CLI(%eax) 7070Sstevel@tonic-gate movl %gs:CPU_THREAD, %ebx 7080Sstevel@tonic-gate movl T_STACK(%ebx), %esp / switch to the thread stack 7090Sstevel@tonic-gate movl REGOFF_EAX(%esp), %eax / recover original syscall# 7100Sstevel@tonic-gate jmp _watch_do_syscall 7110Sstevel@tonic-gate SET_SIZE(watch_syscall) 7120Sstevel@tonic-gate 7130Sstevel@tonic-gate#endif /* __lint */ 714