xref: /onnv-gate/usr/src/uts/i86pc/ml/syscall_asm.s (revision 12613:4c5722bc28dc)
10Sstevel@tonic-gate/*
20Sstevel@tonic-gate * CDDL HEADER START
30Sstevel@tonic-gate *
40Sstevel@tonic-gate * The contents of this file are subject to the terms of the
52712Snn35248 * Common Development and Distribution License (the "License").
62712Snn35248 * You may not use this file except in compliance with the License.
70Sstevel@tonic-gate *
80Sstevel@tonic-gate * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
90Sstevel@tonic-gate * or http://www.opensolaris.org/os/licensing.
100Sstevel@tonic-gate * See the License for the specific language governing permissions
110Sstevel@tonic-gate * and limitations under the License.
120Sstevel@tonic-gate *
130Sstevel@tonic-gate * When distributing Covered Code, include this CDDL HEADER in each
140Sstevel@tonic-gate * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
150Sstevel@tonic-gate * If applicable, add the following below this CDDL HEADER, with the
160Sstevel@tonic-gate * fields enclosed by brackets "[]" replaced with your own identifying
170Sstevel@tonic-gate * information: Portions Copyright [yyyy] [name of copyright owner]
180Sstevel@tonic-gate *
190Sstevel@tonic-gate * CDDL HEADER END
200Sstevel@tonic-gate */
210Sstevel@tonic-gate/*
22*12613SSurya.Prakki@Sun.COM * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved.
230Sstevel@tonic-gate */
240Sstevel@tonic-gate
250Sstevel@tonic-gate/*	Copyright (c) 1990, 1991 UNIX System Laboratories, Inc.	*/
260Sstevel@tonic-gate/*	Copyright (c) 1984, 1986, 1987, 1988, 1989, 1990 AT&T	*/
270Sstevel@tonic-gate/*	  All Rights Reserved					*/
280Sstevel@tonic-gate
290Sstevel@tonic-gate/*	Copyright (c) 1987, 1988 Microsoft Corporation		*/
300Sstevel@tonic-gate/*	  All Rights Reserved					*/
310Sstevel@tonic-gate
320Sstevel@tonic-gate#include <sys/asm_linkage.h>
330Sstevel@tonic-gate#include <sys/asm_misc.h>
340Sstevel@tonic-gate#include <sys/regset.h>
350Sstevel@tonic-gate#include <sys/psw.h>
360Sstevel@tonic-gate#include <sys/x86_archext.h>
372712Snn35248#include <sys/machbrand.h>
383446Smrj#include <sys/privregs.h>
390Sstevel@tonic-gate
400Sstevel@tonic-gate#if defined(__lint)
410Sstevel@tonic-gate
420Sstevel@tonic-gate#include <sys/types.h>
430Sstevel@tonic-gate#include <sys/thread.h>
440Sstevel@tonic-gate#include <sys/systm.h>
450Sstevel@tonic-gate
460Sstevel@tonic-gate#else	/* __lint */
470Sstevel@tonic-gate
480Sstevel@tonic-gate#include <sys/segments.h>
490Sstevel@tonic-gate#include <sys/pcb.h>
500Sstevel@tonic-gate#include <sys/trap.h>
510Sstevel@tonic-gate#include <sys/ftrace.h>
520Sstevel@tonic-gate#include <sys/traptrace.h>
530Sstevel@tonic-gate#include <sys/clock.h>
540Sstevel@tonic-gate#include <sys/panic.h>
550Sstevel@tonic-gate#include "assym.h"
560Sstevel@tonic-gate
570Sstevel@tonic-gate#endif	/* __lint */
580Sstevel@tonic-gate
590Sstevel@tonic-gate/*
600Sstevel@tonic-gate * We implement two flavours of system call entry points
610Sstevel@tonic-gate *
620Sstevel@tonic-gate * -	{int,lcall}/iret	(i386)
630Sstevel@tonic-gate * -	sysenter/sysexit	(Pentium II and beyond)
640Sstevel@tonic-gate *
650Sstevel@tonic-gate * The basic pattern used in the handlers is to check to see if we can
660Sstevel@tonic-gate * do fast (simple) version of the system call; if we can't we use various
670Sstevel@tonic-gate * C routines that handle corner cases and debugging.
680Sstevel@tonic-gate *
690Sstevel@tonic-gate * To reduce the amount of assembler replication, yet keep the system call
700Sstevel@tonic-gate * implementations vaguely comprehensible, the common code in the body
710Sstevel@tonic-gate * of the handlers is broken up into a set of preprocessor definitions
720Sstevel@tonic-gate * below.
730Sstevel@tonic-gate */
740Sstevel@tonic-gate
750Sstevel@tonic-gate/*
760Sstevel@tonic-gate * When we have SYSCALLTRACE defined, we sneak an extra
770Sstevel@tonic-gate * predicate into a couple of tests.
780Sstevel@tonic-gate */
790Sstevel@tonic-gate#if defined(SYSCALLTRACE)
800Sstevel@tonic-gate#define	ORL_SYSCALLTRACE(r32)	\
810Sstevel@tonic-gate	orl	syscalltrace, r32
820Sstevel@tonic-gate#else
830Sstevel@tonic-gate#define	ORL_SYSCALLTRACE(r32)
840Sstevel@tonic-gate#endif
850Sstevel@tonic-gate
860Sstevel@tonic-gate/*
870Sstevel@tonic-gate * This check is false whenever we want to go fast i.e.
880Sstevel@tonic-gate *
890Sstevel@tonic-gate *	if (code >= NSYSCALL ||
900Sstevel@tonic-gate *	    t->t_pre_sys || (t->t_proc_flag & TP_WATCHPT) != 0)
910Sstevel@tonic-gate *		do full version
920Sstevel@tonic-gate * #ifdef SYSCALLTRACE
930Sstevel@tonic-gate *	if (syscalltrace)
940Sstevel@tonic-gate *		do full version
950Sstevel@tonic-gate * #endif
960Sstevel@tonic-gate *
970Sstevel@tonic-gate * Preconditions:
980Sstevel@tonic-gate * -	t	curthread
990Sstevel@tonic-gate * -	code	contains the syscall number
1000Sstevel@tonic-gate * Postconditions:
1010Sstevel@tonic-gate * -	%ecx and %edi are smashed
1020Sstevel@tonic-gate * -	condition code flag ZF is cleared if pre-sys is too complex
1030Sstevel@tonic-gate */
1040Sstevel@tonic-gate#define	CHECK_PRESYS_NE(t, code)		\
1050Sstevel@tonic-gate	movzbl	T_PRE_SYS(t), %edi;		\
1060Sstevel@tonic-gate	movzwl	T_PROC_FLAG(t), %ecx;		\
1070Sstevel@tonic-gate	andl	$TP_WATCHPT, %ecx;		\
1080Sstevel@tonic-gate	orl	%ecx, %edi;			\
1090Sstevel@tonic-gate	cmpl	$NSYSCALL, code;		\
1100Sstevel@tonic-gate	setae	%cl;				\
1110Sstevel@tonic-gate	movzbl	%cl, %ecx;			\
1120Sstevel@tonic-gate	orl	%ecx, %edi;			\
1130Sstevel@tonic-gate	ORL_SYSCALLTRACE(%edi)
1140Sstevel@tonic-gate
1152712Snn35248/*
11610395Sgerald.jelinek@sun.com * Check if a brand_mach_ops callback is defined for the specified callback_id
11711685Sgerald.jelinek@sun.com * type.  If so invoke it with the user's %gs value loaded and the following
11810395Sgerald.jelinek@sun.com * data on the stack:
1192712Snn35248 *	   --------------------------------------
12011685Sgerald.jelinek@sun.com *         | user's %ss                         |
12111685Sgerald.jelinek@sun.com *    |    | user's %esp                        |
12211685Sgerald.jelinek@sun.com *    |    | EFLAGS register                    |
12311685Sgerald.jelinek@sun.com *    |    | user's %cs                         |
12411685Sgerald.jelinek@sun.com *    |    | user's %eip (user return address)  |
12511685Sgerald.jelinek@sun.com *    |    | 'scratch space'			|
12611685Sgerald.jelinek@sun.com *    |    | user's %ebx			|
12711685Sgerald.jelinek@sun.com *    |    | user's %gs selector		|
12811685Sgerald.jelinek@sun.com *    v    | lwp pointer			|
1292712Snn35248 *         | callback wrapper return addr 	|
1302712Snn35248 *         --------------------------------------
1312712Snn35248 *
1322712Snn35248 * If the brand code returns, we assume that we are meant to execute the
1332712Snn35248 * normal system call path.
13411685Sgerald.jelinek@sun.com *
13511685Sgerald.jelinek@sun.com * The interface to the brand callbacks on the 32-bit kernel assumes %ebx
13611685Sgerald.jelinek@sun.com * is available as a scratch register within the callback.  If the callback
13711685Sgerald.jelinek@sun.com * returns within the kernel then this macro will restore %ebx.  If the
13811685Sgerald.jelinek@sun.com * callback is going to return directly to userland then it should restore
13911685Sgerald.jelinek@sun.com * %ebx before returning to userland.
1402712Snn35248 */
1412712Snn35248#define	BRAND_CALLBACK(callback_id)					    \
1422712Snn35248	subl	$4, %esp		/* save some scratch space	*/ ;\
1432712Snn35248	pushl	%ebx			/* save %ebx to use for scratch	*/ ;\
1442712Snn35248	pushl	%gs			/* save the user %gs		*/ ;\
1452712Snn35248	movl	$KGS_SEL, %ebx						   ;\
1462744Snn35248	movw	%bx, %gs		/* switch to the kernel's %gs	*/ ;\
1472712Snn35248	movl	%gs:CPU_THREAD, %ebx	/* load the thread pointer	*/ ;\
1482712Snn35248	movl	T_LWP(%ebx), %ebx	/* load the lwp pointer		*/ ;\
1495144Ssp92102	pushl	%ebx			/* push the lwp pointer		*/ ;\
1502712Snn35248	movl	LWP_PROCP(%ebx), %ebx	/* load the proc pointer	*/ ;\
1512712Snn35248	movl	P_BRAND(%ebx), %ebx	/* load the brand pointer	*/ ;\
1522712Snn35248	movl	B_MACHOPS(%ebx), %ebx	/* load the machops pointer	*/ ;\
1532712Snn35248	movl	_CONST(_MUL(callback_id, CPTRSIZE))(%ebx), %ebx		   ;\
1542712Snn35248	cmpl	$0, %ebx						   ;\
1552712Snn35248	je	1f							   ;\
15611685Sgerald.jelinek@sun.com	movl	%ebx, 12(%esp)		/* save callback to scratch	*/ ;\
15711685Sgerald.jelinek@sun.com	movl	4(%esp), %ebx		/* grab the user %gs		*/ ;\
1582744Snn35248	movw	%bx, %gs		/* restore the user %gs		*/ ;\
15911685Sgerald.jelinek@sun.com	call	*12(%esp)		/* call callback in scratch	*/ ;\
16011685Sgerald.jelinek@sun.com1:	movl	4(%esp), %ebx		/* restore user %gs (re-do if	*/ ;\
16111685Sgerald.jelinek@sun.com	movw	%bx, %gs		/* branch due to no callback)	*/ ;\
16211685Sgerald.jelinek@sun.com	movl	8(%esp), %ebx		/* restore user's %ebx		*/ ;\
16311685Sgerald.jelinek@sun.com	addl	$16, %esp		/* restore stack ptr		*/
1642712Snn35248
1650Sstevel@tonic-gate#define	MSTATE_TRANSITION(from, to)		\
1660Sstevel@tonic-gate	pushl	$to;				\
1670Sstevel@tonic-gate	pushl	$from;				\
1680Sstevel@tonic-gate	call	syscall_mstate;			\
1690Sstevel@tonic-gate	addl	$0x8, %esp
1700Sstevel@tonic-gate
1710Sstevel@tonic-gate/*
1720Sstevel@tonic-gate * aka CPU_STATS_ADDQ(CPU, sys.syscall, 1)
1730Sstevel@tonic-gate * This must be called with interrupts or preemption disabled.
1740Sstevel@tonic-gate */
1750Sstevel@tonic-gate#define	CPU_STATS_SYS_SYSCALL_INC			\
1760Sstevel@tonic-gate	addl	$1, %gs:CPU_STATS_SYS_SYSCALL;		\
1770Sstevel@tonic-gate	adcl	$0, %gs:CPU_STATS_SYS_SYSCALL+4;
1780Sstevel@tonic-gate
1790Sstevel@tonic-gate#if !defined(__lint)
1800Sstevel@tonic-gate
1810Sstevel@tonic-gate/*
1820Sstevel@tonic-gate * ASSERT(lwptoregs(lwp) == rp);
1830Sstevel@tonic-gate *
1840Sstevel@tonic-gate * this may seem obvious, but very odd things happen if this
1850Sstevel@tonic-gate * assertion is false
1860Sstevel@tonic-gate *
1870Sstevel@tonic-gate * Preconditions:
1880Sstevel@tonic-gate *	-none-
1890Sstevel@tonic-gate * Postconditions (if assertion is true):
1900Sstevel@tonic-gate *	%esi and %edi are smashed
1910Sstevel@tonic-gate */
1920Sstevel@tonic-gate#if defined(DEBUG)
1930Sstevel@tonic-gate
1940Sstevel@tonic-gate__lwptoregs_msg:
1957542SRichard.Bean@Sun.COM	.string	"syscall_asm.s:%d lwptoregs(%p) [%p] != rp [%p]"
1960Sstevel@tonic-gate
1970Sstevel@tonic-gate#define	ASSERT_LWPTOREGS(t, rp)				\
1980Sstevel@tonic-gate	movl	T_LWP(t), %esi;				\
1990Sstevel@tonic-gate	movl	LWP_REGS(%esi), %edi;			\
2000Sstevel@tonic-gate	cmpl	rp, %edi;				\
2010Sstevel@tonic-gate	je	7f;					\
2020Sstevel@tonic-gate	pushl	rp;					\
2030Sstevel@tonic-gate	pushl	%edi;					\
2040Sstevel@tonic-gate	pushl	%esi;					\
2050Sstevel@tonic-gate	pushl	$__LINE__;				\
2060Sstevel@tonic-gate	pushl	$__lwptoregs_msg;			\
2070Sstevel@tonic-gate	call	panic;					\
2080Sstevel@tonic-gate7:
2090Sstevel@tonic-gate#else
2100Sstevel@tonic-gate#define	ASSERT_LWPTOREGS(t, rp)
2110Sstevel@tonic-gate#endif
2120Sstevel@tonic-gate
2130Sstevel@tonic-gate#endif	/* __lint */
2140Sstevel@tonic-gate
2150Sstevel@tonic-gate/*
2160Sstevel@tonic-gate * This is an assembler version of this fragment:
2170Sstevel@tonic-gate *
2180Sstevel@tonic-gate * lwp->lwp_state = LWP_SYS;
2190Sstevel@tonic-gate * lwp->lwp_ru.sysc++;
2200Sstevel@tonic-gate * lwp->lwp_eosys = NORMALRETURN;
2210Sstevel@tonic-gate * lwp->lwp_ap = argp;
2220Sstevel@tonic-gate *
2230Sstevel@tonic-gate * Preconditions:
2240Sstevel@tonic-gate *	-none-
2250Sstevel@tonic-gate * Postconditions:
2260Sstevel@tonic-gate *	-none-
2270Sstevel@tonic-gate */
2280Sstevel@tonic-gate#define	SET_LWP(lwp, argp)				\
2290Sstevel@tonic-gate	movb	$LWP_SYS, LWP_STATE(lwp);		\
2300Sstevel@tonic-gate	addl	$1, LWP_RU_SYSC(lwp);			\
2310Sstevel@tonic-gate	adcl	$0, LWP_RU_SYSC+4(lwp);			\
2320Sstevel@tonic-gate	movb	$NORMALRETURN, LWP_EOSYS(lwp);		\
2330Sstevel@tonic-gate	movl	argp, LWP_AP(lwp)
2340Sstevel@tonic-gate
2350Sstevel@tonic-gate/*
2360Sstevel@tonic-gate * Set up the thread, lwp, find the handler, and copy
2370Sstevel@tonic-gate * in the arguments from userland to the kernel stack.
2380Sstevel@tonic-gate *
2390Sstevel@tonic-gate * Preconditions:
2400Sstevel@tonic-gate * -	%eax contains the syscall number
2410Sstevel@tonic-gate * Postconditions:
2420Sstevel@tonic-gate * -	%eax contains a pointer to the sysent structure
2430Sstevel@tonic-gate * -	%ecx is zeroed
2440Sstevel@tonic-gate * -	%esi, %edi are smashed
2450Sstevel@tonic-gate * -	%esp is SYS_DROPped ready for the syscall
2460Sstevel@tonic-gate */
2470Sstevel@tonic-gate#define	SIMPLE_SYSCALL_PRESYS(t, faultlabel)		\
2480Sstevel@tonic-gate	movl	T_LWP(t), %esi;				\
2490Sstevel@tonic-gate	movw	%ax, T_SYSNUM(t);			\
2500Sstevel@tonic-gate	subl	$SYS_DROP, %esp;			\
2510Sstevel@tonic-gate	shll	$SYSENT_SIZE_SHIFT, %eax;			\
2520Sstevel@tonic-gate	SET_LWP(%esi, %esp);				\
2530Sstevel@tonic-gate	leal	sysent(%eax), %eax;			\
2540Sstevel@tonic-gate	movzbl	SY_NARG(%eax), %ecx;			\
2550Sstevel@tonic-gate	testl	%ecx, %ecx;				\
2560Sstevel@tonic-gate	jz	4f;					\
2570Sstevel@tonic-gate	movl	%esp, %edi;				\
2580Sstevel@tonic-gate	movl	SYS_DROP + REGOFF_UESP(%esp), %esi;	\
2590Sstevel@tonic-gate	movl	$faultlabel, T_LOFAULT(t);		\
2600Sstevel@tonic-gate	addl	$4, %esi;				\
2610Sstevel@tonic-gate	rep;						\
2620Sstevel@tonic-gate	  smovl;					\
2630Sstevel@tonic-gate	movl	%ecx, T_LOFAULT(t);			\
2640Sstevel@tonic-gate4:
2650Sstevel@tonic-gate
2660Sstevel@tonic-gate/*
2670Sstevel@tonic-gate * Check to see if a simple return is possible i.e.
2680Sstevel@tonic-gate *
2690Sstevel@tonic-gate *	if ((t->t_post_sys_ast | syscalltrace) != 0)
2700Sstevel@tonic-gate *		do full version;
2710Sstevel@tonic-gate *
2720Sstevel@tonic-gate * Preconditions:
2730Sstevel@tonic-gate * -	t is curthread
2740Sstevel@tonic-gate * Postconditions:
2750Sstevel@tonic-gate * -	condition code NE is set if post-sys is too complex
2760Sstevel@tonic-gate * -	rtmp is zeroed if it isn't (we rely on this!)
2770Sstevel@tonic-gate */
2780Sstevel@tonic-gate#define	CHECK_POSTSYS_NE(t, rtmp)			\
2790Sstevel@tonic-gate	xorl	rtmp, rtmp;				\
2800Sstevel@tonic-gate	ORL_SYSCALLTRACE(rtmp);				\
2810Sstevel@tonic-gate	orl	T_POST_SYS_AST(t), rtmp;		\
2820Sstevel@tonic-gate	cmpl	$0, rtmp
2830Sstevel@tonic-gate
2840Sstevel@tonic-gate/*
2850Sstevel@tonic-gate * Fix up the lwp, thread, and eflags for a successful return
2860Sstevel@tonic-gate *
2870Sstevel@tonic-gate * Preconditions:
2880Sstevel@tonic-gate * -	zwreg contains zero
2890Sstevel@tonic-gate * Postconditions:
2900Sstevel@tonic-gate * -	%esp has been unSYS_DROPped
2910Sstevel@tonic-gate * -	%esi is smashed (points to lwp)
2920Sstevel@tonic-gate */
2930Sstevel@tonic-gate#define	SIMPLE_SYSCALL_POSTSYS(t, zwreg)		\
2940Sstevel@tonic-gate	movl	T_LWP(t), %esi;				\
2950Sstevel@tonic-gate	addl	$SYS_DROP, %esp;			\
2960Sstevel@tonic-gate	movw	zwreg, T_SYSNUM(t);			\
2970Sstevel@tonic-gate	movb	$LWP_USER, LWP_STATE(%esi);		\
2980Sstevel@tonic-gate	andb	$_CONST(0xffff - PS_C), REGOFF_EFL(%esp)
2990Sstevel@tonic-gate
3000Sstevel@tonic-gate/*
3010Sstevel@tonic-gate * System call handler.  This is the destination of both the call
3020Sstevel@tonic-gate * gate (lcall 0x27) _and_ the interrupt gate (int 0x91). For our purposes,
3030Sstevel@tonic-gate * there are two significant differences between an interrupt gate and a call
3040Sstevel@tonic-gate * gate:
3050Sstevel@tonic-gate *
3060Sstevel@tonic-gate * 1) An interrupt gate runs the handler with interrupts disabled, whereas a
3070Sstevel@tonic-gate * call gate runs the handler with whatever EFLAGS settings were in effect at
3080Sstevel@tonic-gate * the time of the call.
3090Sstevel@tonic-gate *
3100Sstevel@tonic-gate * 2) An interrupt gate pushes the contents of the EFLAGS register at the time
3110Sstevel@tonic-gate * of the interrupt onto the stack, whereas a call gate does not.
3120Sstevel@tonic-gate *
3130Sstevel@tonic-gate * Because we use the following code sequence to handle system calls made from
3140Sstevel@tonic-gate * _both_ a call gate _and_ an interrupt gate, these two differences must be
3150Sstevel@tonic-gate * respected. In regards to number 1) above, the handler must ensure that a sane
3160Sstevel@tonic-gate * EFLAGS snapshot is stored on the stack so that when the kernel returns back
3170Sstevel@tonic-gate * to the user via iret (which returns to user with the EFLAGS value saved on
3180Sstevel@tonic-gate * the stack), interrupts are re-enabled.
3190Sstevel@tonic-gate *
3200Sstevel@tonic-gate * In regards to number 2) above, the handler must always put a current snapshot
3210Sstevel@tonic-gate * of EFLAGS onto the stack in the appropriate place. If we came in via an
3220Sstevel@tonic-gate * interrupt gate, we will be clobbering the EFLAGS value that was pushed by
3230Sstevel@tonic-gate * the interrupt gate. This is OK, as the only bit that was changed by the
3240Sstevel@tonic-gate * hardware was the IE (interrupt enable) bit, which for an interrupt gate is
3250Sstevel@tonic-gate * now off. If we were to do nothing, the stack would contain an EFLAGS with
3260Sstevel@tonic-gate * IE off, resulting in us eventually returning back to the user with interrupts
3270Sstevel@tonic-gate * disabled. The solution is to turn on the IE bit in the EFLAGS value saved on
3280Sstevel@tonic-gate * the stack.
3290Sstevel@tonic-gate *
3300Sstevel@tonic-gate * Another subtlety which deserves mention is the difference between the two
3310Sstevel@tonic-gate * descriptors. The call gate descriptor is set to instruct the hardware to copy
3320Sstevel@tonic-gate * one parameter from the user stack to the kernel stack, whereas the interrupt
3330Sstevel@tonic-gate * gate descriptor doesn't use the parameter passing mechanism at all. The
3340Sstevel@tonic-gate * kernel doesn't actually use the parameter that is copied by the hardware; the
3350Sstevel@tonic-gate * only reason it does this is so that there is a space on the stack large
3360Sstevel@tonic-gate * enough to hold an EFLAGS register value, which happens to be in the correct
3370Sstevel@tonic-gate * place for use by iret when we go back to userland. How convenient.
3380Sstevel@tonic-gate *
3390Sstevel@tonic-gate * Stack frame description in syscall() and callees.
3400Sstevel@tonic-gate *
3410Sstevel@tonic-gate * |------------|
3420Sstevel@tonic-gate * | regs	| +(8*4)+4	registers
3430Sstevel@tonic-gate * |------------|
3440Sstevel@tonic-gate * | 8 args	| <- %esp	MAXSYSARGS (currently 8) arguments
3450Sstevel@tonic-gate * |------------|
3460Sstevel@tonic-gate *
3470Sstevel@tonic-gate */
3480Sstevel@tonic-gate#define	SYS_DROP	_CONST(_MUL(MAXSYSARGS, 4))
3490Sstevel@tonic-gate
3500Sstevel@tonic-gate#if defined(__lint)
3510Sstevel@tonic-gate
3520Sstevel@tonic-gate/*ARGSUSED*/
3530Sstevel@tonic-gatevoid
3540Sstevel@tonic-gatesys_call()
3550Sstevel@tonic-gate{}
3560Sstevel@tonic-gate
3570Sstevel@tonic-gatevoid
3580Sstevel@tonic-gate_allsyscalls()
3590Sstevel@tonic-gate{}
3600Sstevel@tonic-gate
3610Sstevel@tonic-gatesize_t _allsyscalls_size;
3620Sstevel@tonic-gate
3630Sstevel@tonic-gate#else	/* __lint */
3640Sstevel@tonic-gate
3652712Snn35248	ENTRY_NP2(brand_sys_call, _allsyscalls)
3662712Snn35248	BRAND_CALLBACK(BRAND_CB_SYSCALL)
3670Sstevel@tonic-gate
3682712Snn35248	ALTENTRY(sys_call)
3690Sstevel@tonic-gate	/ on entry	eax = system call number
3702712Snn35248
3710Sstevel@tonic-gate	/ set up the stack to look as in reg.h
3720Sstevel@tonic-gate	subl    $8, %esp        / pad the stack with ERRCODE and TRAPNO
3730Sstevel@tonic-gate
3740Sstevel@tonic-gate	SYSCALL_PUSH
3750Sstevel@tonic-gate
3760Sstevel@tonic-gate#ifdef TRAPTRACE
3770Sstevel@tonic-gate	TRACE_PTR(%edi, %ebx, %ebx, %ecx, $TT_SYSCALL) / Uses labels "8" and "9"
3780Sstevel@tonic-gate	TRACE_REGS(%edi, %esp, %ebx, %ecx)	/ Uses label "9"
3790Sstevel@tonic-gate	pushl	%eax
3800Sstevel@tonic-gate	TRACE_STAMP(%edi)		/ Clobbers %eax, %edx, uses "9"
3810Sstevel@tonic-gate	popl	%eax
3820Sstevel@tonic-gate	movl	%eax, TTR_SYSNUM(%edi)
3830Sstevel@tonic-gate#endif
3840Sstevel@tonic-gate
3850Sstevel@tonic-gate_watch_do_syscall:
3860Sstevel@tonic-gate	movl	%esp, %ebp
3870Sstevel@tonic-gate
3883446Smrj	/ Interrupts may be enabled here, so we must make sure this thread
3893446Smrj	/ doesn't migrate off the CPU while it updates the CPU stats.
3903446Smrj	/
3913446Smrj	/ XXX This is only true if we got here via call gate thru the LDT for
3923446Smrj	/ old style syscalls. Perhaps this preempt++-- will go away soon?
3933446Smrj	movl	%gs:CPU_THREAD, %ebx
3943446Smrj	addb	$1, T_PREEMPT(%ebx)
3953446Smrj	CPU_STATS_SYS_SYSCALL_INC
3963446Smrj	subb	$1, T_PREEMPT(%ebx)
3973446Smrj
3983446Smrj	ENABLE_INTR_FLAGS
3993446Smrj
4000Sstevel@tonic-gate	pushl	%eax				/ preserve across mstate call
4010Sstevel@tonic-gate	MSTATE_TRANSITION(LMS_USER, LMS_SYSTEM)
4020Sstevel@tonic-gate	popl	%eax
4030Sstevel@tonic-gate
4040Sstevel@tonic-gate	movl	%gs:CPU_THREAD, %ebx
4050Sstevel@tonic-gate
4060Sstevel@tonic-gate	ASSERT_LWPTOREGS(%ebx, %esp)
4070Sstevel@tonic-gate
4080Sstevel@tonic-gate	CHECK_PRESYS_NE(%ebx, %eax)
4090Sstevel@tonic-gate	jne	_full_syscall_presys
4100Sstevel@tonic-gate	SIMPLE_SYSCALL_PRESYS(%ebx, _syscall_fault)
4110Sstevel@tonic-gate
4120Sstevel@tonic-gate_syslcall_call:
4130Sstevel@tonic-gate	call	*SY_CALLC(%eax)
4140Sstevel@tonic-gate
4150Sstevel@tonic-gate_syslcall_done:
4160Sstevel@tonic-gate	CHECK_POSTSYS_NE(%ebx, %ecx)
4170Sstevel@tonic-gate	jne	_full_syscall_postsys
4180Sstevel@tonic-gate	SIMPLE_SYSCALL_POSTSYS(%ebx, %cx)
4190Sstevel@tonic-gate	movl	%eax, REGOFF_EAX(%esp)
4200Sstevel@tonic-gate	movl	%edx, REGOFF_EDX(%esp)
4210Sstevel@tonic-gate
4220Sstevel@tonic-gate	MSTATE_TRANSITION(LMS_SYSTEM, LMS_USER)
4230Sstevel@tonic-gate
4240Sstevel@tonic-gate	/
4250Sstevel@tonic-gate	/ get back via iret
4260Sstevel@tonic-gate	/
4273446Smrj	CLI(%edx)
4283446Smrj	jmp	sys_rtt_syscall
4290Sstevel@tonic-gate
4300Sstevel@tonic-gate_full_syscall_presys:
4310Sstevel@tonic-gate	movl	T_LWP(%ebx), %esi
4320Sstevel@tonic-gate	subl	$SYS_DROP, %esp
4330Sstevel@tonic-gate	movb	$LWP_SYS, LWP_STATE(%esi)
4340Sstevel@tonic-gate	pushl	%esp
4350Sstevel@tonic-gate	pushl	%ebx
4360Sstevel@tonic-gate	call	syscall_entry
4370Sstevel@tonic-gate	addl	$8, %esp
4380Sstevel@tonic-gate	jmp	_syslcall_call
4390Sstevel@tonic-gate
4400Sstevel@tonic-gate_full_syscall_postsys:
4410Sstevel@tonic-gate	addl	$SYS_DROP, %esp
4420Sstevel@tonic-gate	pushl	%edx
4430Sstevel@tonic-gate	pushl	%eax
4440Sstevel@tonic-gate	pushl	%ebx
4450Sstevel@tonic-gate	call	syscall_exit
4460Sstevel@tonic-gate	addl	$12, %esp
4470Sstevel@tonic-gate	MSTATE_TRANSITION(LMS_SYSTEM, LMS_USER)
4483446Smrj	jmp	_sys_rtt
4490Sstevel@tonic-gate
4500Sstevel@tonic-gate_syscall_fault:
4510Sstevel@tonic-gate	push	$0xe			/ EFAULT
4520Sstevel@tonic-gate	call	set_errno
4530Sstevel@tonic-gate	addl	$4, %esp
4540Sstevel@tonic-gate	xorl	%eax, %eax		/ fake syscall_err()
4550Sstevel@tonic-gate	xorl	%edx, %edx
4560Sstevel@tonic-gate	jmp	_syslcall_done
4570Sstevel@tonic-gate	SET_SIZE(sys_call)
4582712Snn35248	SET_SIZE(brand_sys_call)
4590Sstevel@tonic-gate
4600Sstevel@tonic-gate#endif	/* __lint */
4610Sstevel@tonic-gate
4620Sstevel@tonic-gate/*
4630Sstevel@tonic-gate * System call handler via the sysenter instruction
4640Sstevel@tonic-gate *
4650Sstevel@tonic-gate * Here's how syscall entry usually works (see sys_call for details).
4660Sstevel@tonic-gate *
4670Sstevel@tonic-gate * There, the caller (lcall or int) in userland has arranged that:
4680Sstevel@tonic-gate *
4690Sstevel@tonic-gate * -	%eax contains the syscall number
4700Sstevel@tonic-gate * -	the user stack contains the args to the syscall
4710Sstevel@tonic-gate *
4720Sstevel@tonic-gate * Normally the lcall instruction into the call gate causes the processor
4730Sstevel@tonic-gate * to push %ss, %esp, <top-of-stack>, %cs, %eip onto the kernel stack.
4740Sstevel@tonic-gate * The sys_call handler then leaves space for r_trapno and r_err, and
4750Sstevel@tonic-gate * pusha's {%eax, %ecx, %edx, %ebx, %esp, %ebp, %esi, %edi}, followed
4760Sstevel@tonic-gate * by %ds, %es, %fs and %gs to capture a 'struct regs' on the stack.
4770Sstevel@tonic-gate * Then the kernel sets %ds, %es and %gs to kernel selectors, and finally
4780Sstevel@tonic-gate * extracts %efl and puts it into r_efl (which happens to live at the offset
4790Sstevel@tonic-gate * that <top-of-stack> was copied into). Note that the value in r_efl has
4800Sstevel@tonic-gate * the IF (interrupt enable) flag turned on. (The int instruction into the
4810Sstevel@tonic-gate * interrupt gate does essentially the same thing, only instead of
4820Sstevel@tonic-gate * <top-of-stack> we get eflags - see comment above.)
4830Sstevel@tonic-gate *
4840Sstevel@tonic-gate * In the sysenter case, things are a lot more primitive.
4850Sstevel@tonic-gate *
4860Sstevel@tonic-gate * The caller in userland has arranged that:
4870Sstevel@tonic-gate *
4880Sstevel@tonic-gate * -	%eax contains the syscall number
4890Sstevel@tonic-gate * -	%ecx contains the user %esp
4900Sstevel@tonic-gate * -	%edx contains the return %eip
4910Sstevel@tonic-gate * -	the user stack contains the args to the syscall
4920Sstevel@tonic-gate *
4930Sstevel@tonic-gate * e.g.
4940Sstevel@tonic-gate *	<args on the stack>
4950Sstevel@tonic-gate *	mov	$SYS_callnum, %eax
4960Sstevel@tonic-gate *	mov	$1f, %edx	/ return %eip
4970Sstevel@tonic-gate *	mov	%esp, %ecx	/ return %esp
4980Sstevel@tonic-gate *	sysenter
4990Sstevel@tonic-gate * 1:
5000Sstevel@tonic-gate *
5010Sstevel@tonic-gate * Hardware and (privileged) initialization code have arranged that by
5020Sstevel@tonic-gate * the time the sysenter instructions completes:
5030Sstevel@tonic-gate *
5040Sstevel@tonic-gate * - %eip is pointing to sys_sysenter (below).
5050Sstevel@tonic-gate * - %cs and %ss are set to kernel text and stack (data) selectors.
5060Sstevel@tonic-gate * - %esp is pointing at the lwp's stack
5070Sstevel@tonic-gate * - Interrupts have been disabled.
5080Sstevel@tonic-gate *
5090Sstevel@tonic-gate * The task for the sysenter handler is:
5100Sstevel@tonic-gate *
5110Sstevel@tonic-gate * -	recreate the same regs structure on the stack and the same
5120Sstevel@tonic-gate *	kernel state as if we'd come in on an lcall
5130Sstevel@tonic-gate * -	do the normal work of a syscall
5140Sstevel@tonic-gate * -	execute the system call epilogue, use sysexit to return to userland.
5150Sstevel@tonic-gate *
5160Sstevel@tonic-gate * Note that we are unable to return both "rvals" to userland with this
5170Sstevel@tonic-gate * call, as %edx is used by the sysexit instruction.
5182712Snn35248 *
5192712Snn35248 * One final complication in this routine is its interaction with
5202712Snn35248 * single-stepping in a debugger.  For most of the system call mechanisms,
5212712Snn35248 * the CPU automatically clears the single-step flag before we enter the
5222712Snn35248 * kernel.  The sysenter mechanism does not clear the flag, so a user
5232712Snn35248 * single-stepping through a libc routine may suddenly find him/herself
5242712Snn35248 * single-stepping through the kernel.  To detect this, kmdb compares the
5252712Snn35248 * trap %pc to the [brand_]sys_enter addresses on each single-step trap.
5262712Snn35248 * If it finds that we have single-stepped to a sysenter entry point, it
5272712Snn35248 * explicitly clears the flag and executes the sys_sysenter routine.
5282712Snn35248 *
5292712Snn35248 * One final complication in this final complication is the fact that we
5302712Snn35248 * have two different entry points for sysenter: brand_sys_sysenter and
5312712Snn35248 * sys_sysenter.  If we enter at brand_sys_sysenter and start single-stepping
5322712Snn35248 * through the kernel with kmdb, we will eventually hit the instruction at
5332712Snn35248 * sys_sysenter.  kmdb cannot distinguish between that valid single-step
5342712Snn35248 * and the undesirable one mentioned above.  To avoid this situation, we
5352712Snn35248 * simply add a jump over the instruction at sys_sysenter to make it
5362712Snn35248 * impossible to single-step to it.
5370Sstevel@tonic-gate */
5380Sstevel@tonic-gate#if defined(__lint)
5390Sstevel@tonic-gate
5400Sstevel@tonic-gatevoid
5410Sstevel@tonic-gatesys_sysenter()
5420Sstevel@tonic-gate{}
5430Sstevel@tonic-gate
5440Sstevel@tonic-gate#else	/* __lint */
5450Sstevel@tonic-gate
5462712Snn35248	ENTRY_NP(brand_sys_sysenter)
5472712Snn35248	pushl	%edx
5482712Snn35248	BRAND_CALLBACK(BRAND_CB_SYSENTER)
5492712Snn35248	popl	%edx
5502712Snn35248	/*
5512712Snn35248	 * Jump over sys_sysenter to allow single-stepping as described
5522712Snn35248	 * above.
5532712Snn35248	 */
5542712Snn35248	ja	1f
5552712Snn35248
5562712Snn35248	ALTENTRY(sys_sysenter)
5572712Snn35248	nop
5582712Snn352481:
5590Sstevel@tonic-gate	/
5600Sstevel@tonic-gate	/ do what the call gate would've done to the stack ..
5610Sstevel@tonic-gate	/
5620Sstevel@tonic-gate	pushl	$UDS_SEL	/ (really %ss, but it's the same ..)
5630Sstevel@tonic-gate	pushl	%ecx		/ userland makes this a copy of %esp
5640Sstevel@tonic-gate	pushfl
5650Sstevel@tonic-gate	orl	$PS_IE, (%esp)	/ turn interrupts on when we return to user
5660Sstevel@tonic-gate	pushl	$UCS_SEL
5670Sstevel@tonic-gate	pushl	%edx		/ userland makes this a copy of %eip
5680Sstevel@tonic-gate	/
5690Sstevel@tonic-gate	/ done.  finish building the stack frame
5700Sstevel@tonic-gate	/
5710Sstevel@tonic-gate	subl	$8, %esp	/ leave space for ERR and TRAPNO
5720Sstevel@tonic-gate
5730Sstevel@tonic-gate	SYSENTER_PUSH
5740Sstevel@tonic-gate
5750Sstevel@tonic-gate#ifdef TRAPTRACE
5760Sstevel@tonic-gate	TRACE_PTR(%edi, %ebx, %ebx, %ecx, $TT_SYSENTER)	/ uses labels 8 and 9
5770Sstevel@tonic-gate	TRACE_REGS(%edi, %esp, %ebx, %ecx)		/ uses label 9
5780Sstevel@tonic-gate	pushl	%eax
5790Sstevel@tonic-gate	TRACE_STAMP(%edi)		/ clobbers %eax, %edx, uses label 9
5800Sstevel@tonic-gate	popl	%eax
5810Sstevel@tonic-gate	movl	%eax, TTR_SYSNUM(%edi)
5820Sstevel@tonic-gate#endif
5830Sstevel@tonic-gate	movl	%esp, %ebp
5840Sstevel@tonic-gate
5850Sstevel@tonic-gate	CPU_STATS_SYS_SYSCALL_INC
5860Sstevel@tonic-gate
5870Sstevel@tonic-gate	ENABLE_INTR_FLAGS
5880Sstevel@tonic-gate
5890Sstevel@tonic-gate	pushl	%eax				/ preserve across mstate call
5900Sstevel@tonic-gate	MSTATE_TRANSITION(LMS_USER, LMS_SYSTEM)
5910Sstevel@tonic-gate	popl	%eax
5920Sstevel@tonic-gate
5930Sstevel@tonic-gate	movl	%gs:CPU_THREAD, %ebx
5940Sstevel@tonic-gate
5950Sstevel@tonic-gate	ASSERT_LWPTOREGS(%ebx, %esp)
5960Sstevel@tonic-gate
5970Sstevel@tonic-gate	CHECK_PRESYS_NE(%ebx, %eax)
5980Sstevel@tonic-gate	jne	_full_syscall_presys
5990Sstevel@tonic-gate	SIMPLE_SYSCALL_PRESYS(%ebx, _syscall_fault)
6000Sstevel@tonic-gate
6010Sstevel@tonic-gate_sysenter_call:
6020Sstevel@tonic-gate	call	*SY_CALLC(%eax)
6030Sstevel@tonic-gate
6040Sstevel@tonic-gate_sysenter_done:
6050Sstevel@tonic-gate	CHECK_POSTSYS_NE(%ebx, %ecx)
6060Sstevel@tonic-gate	jne	_full_syscall_postsys
6070Sstevel@tonic-gate	SIMPLE_SYSCALL_POSTSYS(%ebx, %cx)
6080Sstevel@tonic-gate	/
6090Sstevel@tonic-gate	/ sysexit uses %edx to restore %eip, so we can't use it
6100Sstevel@tonic-gate	/ to return a value, sigh.
6110Sstevel@tonic-gate	/
6120Sstevel@tonic-gate	movl	%eax, REGOFF_EAX(%esp)
6130Sstevel@tonic-gate	/ movl	%edx, REGOFF_EDX(%esp)
6140Sstevel@tonic-gate
6150Sstevel@tonic-gate	/ Interrupts will be turned on by the 'sti' executed just before
6160Sstevel@tonic-gate	/ sysexit. The following ensures that restoring the user's EFLAGS
6170Sstevel@tonic-gate	/ doesn't enable interrupts too soon.
6180Sstevel@tonic-gate	andl	$_BITNOT(PS_IE), REGOFF_EFL(%esp)
6190Sstevel@tonic-gate
6200Sstevel@tonic-gate	MSTATE_TRANSITION(LMS_SYSTEM, LMS_USER)
6210Sstevel@tonic-gate
6220Sstevel@tonic-gate	cli
6230Sstevel@tonic-gate
6240Sstevel@tonic-gate	SYSCALL_POP
6250Sstevel@tonic-gate
6260Sstevel@tonic-gate	popl	%edx			/ sysexit: %edx -> %eip
6270Sstevel@tonic-gate	addl	$4, %esp		/ get CS off the stack
6280Sstevel@tonic-gate	popfl				/ EFL
6290Sstevel@tonic-gate	popl	%ecx			/ sysexit: %ecx -> %esp
6300Sstevel@tonic-gate	sti
6310Sstevel@tonic-gate	sysexit
6320Sstevel@tonic-gate	SET_SIZE(sys_sysenter)
6332712Snn35248	SET_SIZE(brand_sys_sysenter)
6342712Snn35248
6350Sstevel@tonic-gate/*
6360Sstevel@tonic-gate * Declare a uintptr_t which covers the entire pc range of syscall
6370Sstevel@tonic-gate * handlers for the stack walkers that need this.
6380Sstevel@tonic-gate */
6390Sstevel@tonic-gate	.align	CPTRSIZE
6400Sstevel@tonic-gate	.globl	_allsyscalls_size
6410Sstevel@tonic-gate	.type	_allsyscalls_size, @object
6420Sstevel@tonic-gate_allsyscalls_size:
6430Sstevel@tonic-gate	.NWORD	. - _allsyscalls
6440Sstevel@tonic-gate	SET_SIZE(_allsyscalls_size)
6450Sstevel@tonic-gate
6460Sstevel@tonic-gate#endif	/* __lint */
6470Sstevel@tonic-gate
6480Sstevel@tonic-gate/*
6490Sstevel@tonic-gate * These are the thread context handlers for lwps using sysenter/sysexit.
6500Sstevel@tonic-gate */
6510Sstevel@tonic-gate
6520Sstevel@tonic-gate#if defined(__lint)
6530Sstevel@tonic-gate
6540Sstevel@tonic-gate/*ARGSUSED*/
6550Sstevel@tonic-gatevoid
6560Sstevel@tonic-gatesep_save(void *ksp)
6570Sstevel@tonic-gate{}
6580Sstevel@tonic-gate
6590Sstevel@tonic-gate/*ARGSUSED*/
6600Sstevel@tonic-gatevoid
6610Sstevel@tonic-gatesep_restore(void *ksp)
6620Sstevel@tonic-gate{}
6630Sstevel@tonic-gate
6640Sstevel@tonic-gate#else	/* __lint */
6650Sstevel@tonic-gate
6660Sstevel@tonic-gate	/*
6670Sstevel@tonic-gate	 * setting this value to zero as we switch away causes the
6680Sstevel@tonic-gate	 * stack-pointer-on-sysenter to be NULL, ensuring that we
6690Sstevel@tonic-gate	 * don't silently corrupt another (preempted) thread stack
6700Sstevel@tonic-gate	 * when running an lwp that (somehow) didn't get sep_restore'd
6710Sstevel@tonic-gate	 */
6720Sstevel@tonic-gate	ENTRY_NP(sep_save)
6730Sstevel@tonic-gate	xorl	%edx, %edx
6740Sstevel@tonic-gate	xorl	%eax, %eax
6750Sstevel@tonic-gate	movl	$MSR_INTC_SEP_ESP, %ecx
6760Sstevel@tonic-gate	wrmsr
6770Sstevel@tonic-gate	ret
6780Sstevel@tonic-gate	SET_SIZE(sep_save)
6790Sstevel@tonic-gate
6800Sstevel@tonic-gate	/*
6810Sstevel@tonic-gate	 * Update the kernel stack pointer as we resume onto this cpu.
6820Sstevel@tonic-gate	 */
6830Sstevel@tonic-gate	ENTRY_NP(sep_restore)
6840Sstevel@tonic-gate	movl	4(%esp), %eax			/* per-lwp kernel sp */
6850Sstevel@tonic-gate	xorl	%edx, %edx
6860Sstevel@tonic-gate	movl	$MSR_INTC_SEP_ESP, %ecx
6870Sstevel@tonic-gate	wrmsr
6880Sstevel@tonic-gate	ret
6890Sstevel@tonic-gate	SET_SIZE(sep_restore)
6900Sstevel@tonic-gate
6910Sstevel@tonic-gate#endif	/* __lint */
6920Sstevel@tonic-gate
6930Sstevel@tonic-gate/*
6940Sstevel@tonic-gate * Call syscall().  Called from trap() on watchpoint at lcall 0,7
6950Sstevel@tonic-gate */
6960Sstevel@tonic-gate
6970Sstevel@tonic-gate#if defined(__lint)
6980Sstevel@tonic-gate
6990Sstevel@tonic-gatevoid
7000Sstevel@tonic-gatewatch_syscall(void)
7010Sstevel@tonic-gate{}
7020Sstevel@tonic-gate
7030Sstevel@tonic-gate#else	/* __lint */
7040Sstevel@tonic-gate
7050Sstevel@tonic-gate	ENTRY_NP(watch_syscall)
7063446Smrj	CLI(%eax)
7070Sstevel@tonic-gate	movl	%gs:CPU_THREAD, %ebx
7080Sstevel@tonic-gate	movl	T_STACK(%ebx), %esp		/ switch to the thread stack
7090Sstevel@tonic-gate	movl	REGOFF_EAX(%esp), %eax		/ recover original syscall#
7100Sstevel@tonic-gate	jmp	_watch_do_syscall
7110Sstevel@tonic-gate	SET_SIZE(watch_syscall)
7120Sstevel@tonic-gate
7130Sstevel@tonic-gate#endif	/* __lint */
714