Home | History | Annotate | Download | only in ml
      1 /*
      2  * CDDL HEADER START
      3  *
      4  * The contents of this file are subject to the terms of the
      5  * Common Development and Distribution License (the "License").
      6  * You may not use this file except in compliance with the License.
      7  *
      8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
      9  * or http://www.opensolaris.org/os/licensing.
     10  * See the License for the specific language governing permissions
     11  * and limitations under the License.
     12  *
     13  * When distributing Covered Code, include this CDDL HEADER in each
     14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
     15  * If applicable, add the following below this CDDL HEADER, with the
     16  * fields enclosed by brackets "[]" replaced with your own identifying
     17  * information: Portions Copyright [yyyy] [name of copyright owner]
     18  *
     19  * CDDL HEADER END
     20  */
     21 /*
     22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
     23  * Use is subject to license terms.
     24  */
     25 
     26 /*	Copyright (c) 1990, 1991 UNIX System Laboratories, Inc.	*/
     27 /*	Copyright (c) 1984, 1986, 1987, 1988, 1989, 1990 AT&T	*/
     28 /*	  All Rights Reserved					*/
     29 
     30 /*	Copyright (c) 1987, 1988 Microsoft Corporation		*/
     31 /*	  All Rights Reserved					*/
     32 
     33 #include <sys/asm_linkage.h>
     34 #include <sys/asm_misc.h>
     35 #include <sys/regset.h>
     36 #include <sys/psw.h>
     37 #include <sys/x86_archext.h>
     38 #include <sys/machbrand.h>
     39 #include <sys/privregs.h>
     40 
     41 #if defined(__lint)
     42 
     43 #include <sys/types.h>
     44 #include <sys/thread.h>
     45 #include <sys/systm.h>
     46 
     47 #else	/* __lint */
     48 
     49 #include <sys/segments.h>
     50 #include <sys/pcb.h>
     51 #include <sys/trap.h>
     52 #include <sys/ftrace.h>
     53 #include <sys/traptrace.h>
     54 #include <sys/clock.h>
     55 #include <sys/panic.h>
     56 #include "assym.h"
     57 
     58 #endif	/* __lint */
     59 
     60 /*
     61  * We implement two flavours of system call entry points
     62  *
     63  * -	{int,lcall}/iret	(i386)
     64  * -	sysenter/sysexit	(Pentium II and beyond)
     65  *
     66  * The basic pattern used in the handlers is to check to see if we can
     67  * do fast (simple) version of the system call; if we can't we use various
     68  * C routines that handle corner cases and debugging.
     69  *
     70  * To reduce the amount of assembler replication, yet keep the system call
     71  * implementations vaguely comprehensible, the common code in the body
     72  * of the handlers is broken up into a set of preprocessor definitions
     73  * below.
     74  */
     75 
     76 /*
     77  * When we have SYSCALLTRACE defined, we sneak an extra
     78  * predicate into a couple of tests.
     79  */
     80 #if defined(SYSCALLTRACE)
     81 #define	ORL_SYSCALLTRACE(r32)	\
     82 	orl	syscalltrace, r32
     83 #else
     84 #define	ORL_SYSCALLTRACE(r32)
     85 #endif
     86 
     87 /*
     88  * This check is false whenever we want to go fast i.e.
     89  *
     90  *	if (code >= NSYSCALL ||
     91  *	    t->t_pre_sys || (t->t_proc_flag & TP_WATCHPT) != 0)
     92  *		do full version
     93  * #ifdef SYSCALLTRACE
     94  *	if (syscalltrace)
     95  *		do full version
     96  * #endif
     97  *
     98  * Preconditions:
     99  * -	t	curthread
    100  * -	code	contains the syscall number
    101  * Postconditions:
    102  * -	%ecx and %edi are smashed
    103  * -	condition code flag ZF is cleared if pre-sys is too complex
    104  */
    105 #define	CHECK_PRESYS_NE(t, code)		\
    106 	movzbl	T_PRE_SYS(t), %edi;		\
    107 	movzwl	T_PROC_FLAG(t), %ecx;		\
    108 	andl	$TP_WATCHPT, %ecx;		\
    109 	orl	%ecx, %edi;			\
    110 	cmpl	$NSYSCALL, code;		\
    111 	setae	%cl;				\
    112 	movzbl	%cl, %ecx;			\
    113 	orl	%ecx, %edi;			\
    114 	ORL_SYSCALLTRACE(%edi)
    115 
    116 /*
    117  * Check if a brand_mach_ops callback is defined for the specified callback_id
    118  * type.  If so invoke it with the kernel's %gs value loaded and the following
    119  * data on the stack:
    120  *	   --------------------------------------
    121  *         | 'scratch space'			|
    122  *         | user's %ebx			|
    123  *         | user's %gs selector		|
    124  *    |    | kernel's %gs selector		|
    125  *    |    | lwp pointer			|
    126  *    v    | user return address		|
    127  *         | callback wrapper return addr 	|
    128  *         --------------------------------------
    129  *
    130  * The lx brand (at least) uses each of these fields.
    131  * If the brand code returns, we assume that we are meant to execute the
    132  * normal system call path.
    133  */
    134 #define	BRAND_CALLBACK(callback_id)					    \
    135 	subl	$4, %esp		/* save some scratch space	*/ ;\
    136 	pushl	%ebx			/* save %ebx to use for scratch	*/ ;\
    137 	pushl	%gs			/* save the user %gs		*/ ;\
    138 	movl	$KGS_SEL, %ebx						   ;\
    139 	pushl	%ebx			/* push kernel's %gs		*/ ;\
    140 	movw	%bx, %gs		/* switch to the kernel's %gs	*/ ;\
    141 	movl	%gs:CPU_THREAD, %ebx	/* load the thread pointer	*/ ;\
    142 	movl	T_LWP(%ebx), %ebx	/* load the lwp pointer		*/ ;\
    143 	pushl	%ebx			/* push the lwp pointer		*/ ;\
    144 	movl	LWP_PROCP(%ebx), %ebx	/* load the proc pointer	*/ ;\
    145 	movl	P_BRAND(%ebx), %ebx	/* load the brand pointer	*/ ;\
    146 	movl	B_MACHOPS(%ebx), %ebx	/* load the machops pointer	*/ ;\
    147 	movl	_CONST(_MUL(callback_id, CPTRSIZE))(%ebx), %ebx		   ;\
    148 	cmpl	$0, %ebx						   ;\
    149 	je	1f							   ;\
    150 	movl	%ebx, 16(%esp)		/* save callback to scratch	*/ ;\
    151 	movl	12(%esp), %ebx		/* restore %ebx			*/ ;\
    152 	pushl	20(%esp)		/* push the return address	*/ ;\
    153 	call	*20(%esp)		/* call callback		*/ ;\
    154 	addl	$4, %esp		/* get rid of ret addr		*/ ;\
    155 1:	movl	8(%esp), %ebx		/* grab the the user %gs	*/ ;\
    156 	movw	%bx, %gs		/* restore the user %gs		*/ ;\
    157 	movl	12(%esp), %ebx		/* restore user's %ebx		*/ ;\
    158 	addl	$20, %esp		/* restore stack ptr		*/
    159 
    160 #define	MSTATE_TRANSITION(from, to)		\
    161 	pushl	$to;				\
    162 	pushl	$from;				\
    163 	call	syscall_mstate;			\
    164 	addl	$0x8, %esp
    165 
    166 /*
    167  * aka CPU_STATS_ADDQ(CPU, sys.syscall, 1)
    168  * This must be called with interrupts or preemption disabled.
    169  */
    170 #define	CPU_STATS_SYS_SYSCALL_INC			\
    171 	addl	$1, %gs:CPU_STATS_SYS_SYSCALL;		\
    172 	adcl	$0, %gs:CPU_STATS_SYS_SYSCALL+4;
    173 
    174 #if !defined(__lint)
    175 
    176 /*
    177  * ASSERT(lwptoregs(lwp) == rp);
    178  *
    179  * this may seem obvious, but very odd things happen if this
    180  * assertion is false
    181  *
    182  * Preconditions:
    183  *	-none-
    184  * Postconditions (if assertion is true):
    185  *	%esi and %edi are smashed
    186  */
    187 #if defined(DEBUG)
    188 
    189 __lwptoregs_msg:
    190 	.string	"syscall_asm.s:%d lwptoregs(%p) [%p] != rp [%p]"
    191 
    192 #define	ASSERT_LWPTOREGS(t, rp)				\
    193 	movl	T_LWP(t), %esi;				\
    194 	movl	LWP_REGS(%esi), %edi;			\
    195 	cmpl	rp, %edi;				\
    196 	je	7f;					\
    197 	pushl	rp;					\
    198 	pushl	%edi;					\
    199 	pushl	%esi;					\
    200 	pushl	$__LINE__;				\
    201 	pushl	$__lwptoregs_msg;			\
    202 	call	panic;					\
    203 7:
    204 #else
    205 #define	ASSERT_LWPTOREGS(t, rp)
    206 #endif
    207 
    208 #endif	/* __lint */
    209 
    210 /*
    211  * This is an assembler version of this fragment:
    212  *
    213  * lwp->lwp_state = LWP_SYS;
    214  * lwp->lwp_ru.sysc++;
    215  * lwp->lwp_eosys = NORMALRETURN;
    216  * lwp->lwp_ap = argp;
    217  *
    218  * Preconditions:
    219  *	-none-
    220  * Postconditions:
    221  *	-none-
    222  */
    223 #define	SET_LWP(lwp, argp)				\
    224 	movb	$LWP_SYS, LWP_STATE(lwp);		\
    225 	addl	$1, LWP_RU_SYSC(lwp);			\
    226 	adcl	$0, LWP_RU_SYSC+4(lwp);			\
    227 	movb	$NORMALRETURN, LWP_EOSYS(lwp);		\
    228 	movl	argp, LWP_AP(lwp)
    229 
    230 /*
    231  * Set up the thread, lwp, find the handler, and copy
    232  * in the arguments from userland to the kernel stack.
    233  *
    234  * Preconditions:
    235  * -	%eax contains the syscall number
    236  * Postconditions:
    237  * -	%eax contains a pointer to the sysent structure
    238  * -	%ecx is zeroed
    239  * -	%esi, %edi are smashed
    240  * -	%esp is SYS_DROPped ready for the syscall
    241  */
    242 #define	SIMPLE_SYSCALL_PRESYS(t, faultlabel)		\
    243 	movl	T_LWP(t), %esi;				\
    244 	movw	%ax, T_SYSNUM(t);			\
    245 	subl	$SYS_DROP, %esp;			\
    246 	shll	$SYSENT_SIZE_SHIFT, %eax;			\
    247 	SET_LWP(%esi, %esp);				\
    248 	leal	sysent(%eax), %eax;			\
    249 	movzbl	SY_NARG(%eax), %ecx;			\
    250 	testl	%ecx, %ecx;				\
    251 	jz	4f;					\
    252 	movl	%esp, %edi;				\
    253 	movl	SYS_DROP + REGOFF_UESP(%esp), %esi;	\
    254 	movl	$faultlabel, T_LOFAULT(t);		\
    255 	addl	$4, %esi;				\
    256 	rep;						\
    257 	  smovl;					\
    258 	movl	%ecx, T_LOFAULT(t);			\
    259 4:
    260 
    261 /*
    262  * Check to see if a simple return is possible i.e.
    263  *
    264  *	if ((t->t_post_sys_ast | syscalltrace) != 0)
    265  *		do full version;
    266  *
    267  * Preconditions:
    268  * -	t is curthread
    269  * Postconditions:
    270  * -	condition code NE is set if post-sys is too complex
    271  * -	rtmp is zeroed if it isn't (we rely on this!)
    272  */
    273 #define	CHECK_POSTSYS_NE(t, rtmp)			\
    274 	xorl	rtmp, rtmp;				\
    275 	ORL_SYSCALLTRACE(rtmp);				\
    276 	orl	T_POST_SYS_AST(t), rtmp;		\
    277 	cmpl	$0, rtmp
    278 
    279 /*
    280  * Fix up the lwp, thread, and eflags for a successful return
    281  *
    282  * Preconditions:
    283  * -	zwreg contains zero
    284  * Postconditions:
    285  * -	%esp has been unSYS_DROPped
    286  * -	%esi is smashed (points to lwp)
    287  */
    288 #define	SIMPLE_SYSCALL_POSTSYS(t, zwreg)		\
    289 	movl	T_LWP(t), %esi;				\
    290 	addl	$SYS_DROP, %esp;			\
    291 	movw	zwreg, T_SYSNUM(t);			\
    292 	movb	$LWP_USER, LWP_STATE(%esi);		\
    293 	andb	$_CONST(0xffff - PS_C), REGOFF_EFL(%esp)
    294 
    295 /*
    296  * System call handler.  This is the destination of both the call
    297  * gate (lcall 0x27) _and_ the interrupt gate (int 0x91). For our purposes,
    298  * there are two significant differences between an interrupt gate and a call
    299  * gate:
    300  *
    301  * 1) An interrupt gate runs the handler with interrupts disabled, whereas a
    302  * call gate runs the handler with whatever EFLAGS settings were in effect at
    303  * the time of the call.
    304  *
    305  * 2) An interrupt gate pushes the contents of the EFLAGS register at the time
    306  * of the interrupt onto the stack, whereas a call gate does not.
    307  *
    308  * Because we use the following code sequence to handle system calls made from
    309  * _both_ a call gate _and_ an interrupt gate, these two differences must be
    310  * respected. In regards to number 1) above, the handler must ensure that a sane
    311  * EFLAGS snapshot is stored on the stack so that when the kernel returns back
    312  * to the user via iret (which returns to user with the EFLAGS value saved on
    313  * the stack), interrupts are re-enabled.
    314  *
    315  * In regards to number 2) above, the handler must always put a current snapshot
    316  * of EFLAGS onto the stack in the appropriate place. If we came in via an
    317  * interrupt gate, we will be clobbering the EFLAGS value that was pushed by
    318  * the interrupt gate. This is OK, as the only bit that was changed by the
    319  * hardware was the IE (interrupt enable) bit, which for an interrupt gate is
    320  * now off. If we were to do nothing, the stack would contain an EFLAGS with
    321  * IE off, resulting in us eventually returning back to the user with interrupts
    322  * disabled. The solution is to turn on the IE bit in the EFLAGS value saved on
    323  * the stack.
    324  *
    325  * Another subtlety which deserves mention is the difference between the two
    326  * descriptors. The call gate descriptor is set to instruct the hardware to copy
    327  * one parameter from the user stack to the kernel stack, whereas the interrupt
    328  * gate descriptor doesn't use the parameter passing mechanism at all. The
    329  * kernel doesn't actually use the parameter that is copied by the hardware; the
    330  * only reason it does this is so that there is a space on the stack large
    331  * enough to hold an EFLAGS register value, which happens to be in the correct
    332  * place for use by iret when we go back to userland. How convenient.
    333  *
    334  * Stack frame description in syscall() and callees.
    335  *
    336  * |------------|
    337  * | regs	| +(8*4)+4	registers
    338  * |------------|
    339  * | 8 args	| <- %esp	MAXSYSARGS (currently 8) arguments
    340  * |------------|
    341  *
    342  */
    343 #define	SYS_DROP	_CONST(_MUL(MAXSYSARGS, 4))
    344 
    345 #if defined(__lint)
    346 
    347 /*ARGSUSED*/
    348 void
    349 sys_call()
    350 {}
    351 
    352 void
    353 _allsyscalls()
    354 {}
    355 
    356 size_t _allsyscalls_size;
    357 
    358 #else	/* __lint */
    359 
    360 	ENTRY_NP2(brand_sys_call, _allsyscalls)
    361 	BRAND_CALLBACK(BRAND_CB_SYSCALL)
    362 
    363 	ALTENTRY(sys_call)
    364 	/ on entry	eax = system call number
    365 
    366 	/ set up the stack to look as in reg.h
    367 	subl    $8, %esp        / pad the stack with ERRCODE and TRAPNO
    368 
    369 	SYSCALL_PUSH
    370 
    371 #ifdef TRAPTRACE
    372 	TRACE_PTR(%edi, %ebx, %ebx, %ecx, $TT_SYSCALL) / Uses labels "8" and "9"
    373 	TRACE_REGS(%edi, %esp, %ebx, %ecx)	/ Uses label "9"
    374 	pushl	%eax
    375 	TRACE_STAMP(%edi)		/ Clobbers %eax, %edx, uses "9"
    376 	popl	%eax
    377 	movl	%eax, TTR_SYSNUM(%edi)
    378 #endif
    379 
    380 _watch_do_syscall:
    381 	movl	%esp, %ebp
    382 
    383 	/ Interrupts may be enabled here, so we must make sure this thread
    384 	/ doesn't migrate off the CPU while it updates the CPU stats.
    385 	/
    386 	/ XXX This is only true if we got here via call gate thru the LDT for
    387 	/ old style syscalls. Perhaps this preempt++-- will go away soon?
    388 	movl	%gs:CPU_THREAD, %ebx
    389 	addb	$1, T_PREEMPT(%ebx)
    390 	CPU_STATS_SYS_SYSCALL_INC
    391 	subb	$1, T_PREEMPT(%ebx)
    392 
    393 	ENABLE_INTR_FLAGS
    394 
    395 	pushl	%eax				/ preserve across mstate call
    396 	MSTATE_TRANSITION(LMS_USER, LMS_SYSTEM)
    397 	popl	%eax
    398 
    399 	movl	%gs:CPU_THREAD, %ebx
    400 
    401 	ASSERT_LWPTOREGS(%ebx, %esp)
    402 
    403 	CHECK_PRESYS_NE(%ebx, %eax)
    404 	jne	_full_syscall_presys
    405 	SIMPLE_SYSCALL_PRESYS(%ebx, _syscall_fault)
    406 
    407 _syslcall_call:
    408 	call	*SY_CALLC(%eax)
    409 
    410 _syslcall_done:
    411 	CHECK_POSTSYS_NE(%ebx, %ecx)
    412 	jne	_full_syscall_postsys
    413 	SIMPLE_SYSCALL_POSTSYS(%ebx, %cx)
    414 	movl	%eax, REGOFF_EAX(%esp)
    415 	movl	%edx, REGOFF_EDX(%esp)
    416 
    417 	MSTATE_TRANSITION(LMS_SYSTEM, LMS_USER)
    418 
    419 	/
    420 	/ get back via iret
    421 	/
    422 	CLI(%edx)
    423 	jmp	sys_rtt_syscall
    424 
    425 _full_syscall_presys:
    426 	movl	T_LWP(%ebx), %esi
    427 	subl	$SYS_DROP, %esp
    428 	movb	$LWP_SYS, LWP_STATE(%esi)
    429 	pushl	%esp
    430 	pushl	%ebx
    431 	call	syscall_entry
    432 	addl	$8, %esp
    433 	jmp	_syslcall_call
    434 
    435 _full_syscall_postsys:
    436 	addl	$SYS_DROP, %esp
    437 	pushl	%edx
    438 	pushl	%eax
    439 	pushl	%ebx
    440 	call	syscall_exit
    441 	addl	$12, %esp
    442 	MSTATE_TRANSITION(LMS_SYSTEM, LMS_USER)
    443 	jmp	_sys_rtt
    444 
    445 _syscall_fault:
    446 	push	$0xe			/ EFAULT
    447 	call	set_errno
    448 	addl	$4, %esp
    449 	xorl	%eax, %eax		/ fake syscall_err()
    450 	xorl	%edx, %edx
    451 	jmp	_syslcall_done
    452 	SET_SIZE(sys_call)
    453 	SET_SIZE(brand_sys_call)
    454 
    455 #endif	/* __lint */
    456 
    457 /*
    458  * System call handler via the sysenter instruction
    459  *
    460  * Here's how syscall entry usually works (see sys_call for details).
    461  *
    462  * There, the caller (lcall or int) in userland has arranged that:
    463  *
    464  * -	%eax contains the syscall number
    465  * -	the user stack contains the args to the syscall
    466  *
    467  * Normally the lcall instruction into the call gate causes the processor
    468  * to push %ss, %esp, <top-of-stack>, %cs, %eip onto the kernel stack.
    469  * The sys_call handler then leaves space for r_trapno and r_err, and
    470  * pusha's {%eax, %ecx, %edx, %ebx, %esp, %ebp, %esi, %edi}, followed
    471  * by %ds, %es, %fs and %gs to capture a 'struct regs' on the stack.
    472  * Then the kernel sets %ds, %es and %gs to kernel selectors, and finally
    473  * extracts %efl and puts it into r_efl (which happens to live at the offset
    474  * that <top-of-stack> was copied into). Note that the value in r_efl has
    475  * the IF (interrupt enable) flag turned on. (The int instruction into the
    476  * interrupt gate does essentially the same thing, only instead of
    477  * <top-of-stack> we get eflags - see comment above.)
    478  *
    479  * In the sysenter case, things are a lot more primitive.
    480  *
    481  * The caller in userland has arranged that:
    482  *
    483  * -	%eax contains the syscall number
    484  * -	%ecx contains the user %esp
    485  * -	%edx contains the return %eip
    486  * -	the user stack contains the args to the syscall
    487  *
    488  * e.g.
    489  *	<args on the stack>
    490  *	mov	$SYS_callnum, %eax
    491  *	mov	$1f, %edx	/ return %eip
    492  *	mov	%esp, %ecx	/ return %esp
    493  *	sysenter
    494  * 1:
    495  *
    496  * Hardware and (privileged) initialization code have arranged that by
    497  * the time the sysenter instructions completes:
    498  *
    499  * - %eip is pointing to sys_sysenter (below).
    500  * - %cs and %ss are set to kernel text and stack (data) selectors.
    501  * - %esp is pointing at the lwp's stack
    502  * - Interrupts have been disabled.
    503  *
    504  * The task for the sysenter handler is:
    505  *
    506  * -	recreate the same regs structure on the stack and the same
    507  *	kernel state as if we'd come in on an lcall
    508  * -	do the normal work of a syscall
    509  * -	execute the system call epilogue, use sysexit to return to userland.
    510  *
    511  * Note that we are unable to return both "rvals" to userland with this
    512  * call, as %edx is used by the sysexit instruction.
    513  *
    514  * One final complication in this routine is its interaction with
    515  * single-stepping in a debugger.  For most of the system call mechanisms,
    516  * the CPU automatically clears the single-step flag before we enter the
    517  * kernel.  The sysenter mechanism does not clear the flag, so a user
    518  * single-stepping through a libc routine may suddenly find him/herself
    519  * single-stepping through the kernel.  To detect this, kmdb compares the
    520  * trap %pc to the [brand_]sys_enter addresses on each single-step trap.
    521  * If it finds that we have single-stepped to a sysenter entry point, it
    522  * explicitly clears the flag and executes the sys_sysenter routine.
    523  *
    524  * One final complication in this final complication is the fact that we
    525  * have two different entry points for sysenter: brand_sys_sysenter and
    526  * sys_sysenter.  If we enter at brand_sys_sysenter and start single-stepping
    527  * through the kernel with kmdb, we will eventually hit the instruction at
    528  * sys_sysenter.  kmdb cannot distinguish between that valid single-step
    529  * and the undesirable one mentioned above.  To avoid this situation, we
    530  * simply add a jump over the instruction at sys_sysenter to make it
    531  * impossible to single-step to it.
    532  */
    533 #if defined(__lint)
    534 
    535 void
    536 sys_sysenter()
    537 {}
    538 
    539 #else	/* __lint */
    540 
    541 	ENTRY_NP(brand_sys_sysenter)
    542 	pushl	%edx
    543 	BRAND_CALLBACK(BRAND_CB_SYSENTER)
    544 	popl	%edx
    545 	/*
    546 	 * Jump over sys_sysenter to allow single-stepping as described
    547 	 * above.
    548 	 */
    549 	ja	1f
    550 
    551 	ALTENTRY(sys_sysenter)
    552 	nop
    553 1:
    554 	/
    555 	/ do what the call gate would've done to the stack ..
    556 	/
    557 	pushl	$UDS_SEL	/ (really %ss, but it's the same ..)
    558 	pushl	%ecx		/ userland makes this a copy of %esp
    559 	pushfl
    560 	orl	$PS_IE, (%esp)	/ turn interrupts on when we return to user
    561 	pushl	$UCS_SEL
    562 	pushl	%edx		/ userland makes this a copy of %eip
    563 	/
    564 	/ done.  finish building the stack frame
    565 	/
    566 	subl	$8, %esp	/ leave space for ERR and TRAPNO
    567 
    568 	SYSENTER_PUSH
    569 
    570 #ifdef TRAPTRACE
    571 	TRACE_PTR(%edi, %ebx, %ebx, %ecx, $TT_SYSENTER)	/ uses labels 8 and 9
    572 	TRACE_REGS(%edi, %esp, %ebx, %ecx)		/ uses label 9
    573 	pushl	%eax
    574 	TRACE_STAMP(%edi)		/ clobbers %eax, %edx, uses label 9
    575 	popl	%eax
    576 	movl	%eax, TTR_SYSNUM(%edi)
    577 #endif
    578 	movl	%esp, %ebp
    579 
    580 	CPU_STATS_SYS_SYSCALL_INC
    581 
    582 	ENABLE_INTR_FLAGS
    583 
    584 	pushl	%eax				/ preserve across mstate call
    585 	MSTATE_TRANSITION(LMS_USER, LMS_SYSTEM)
    586 	popl	%eax
    587 
    588 	movl	%gs:CPU_THREAD, %ebx
    589 
    590 	ASSERT_LWPTOREGS(%ebx, %esp)
    591 
    592 	CHECK_PRESYS_NE(%ebx, %eax)
    593 	jne	_full_syscall_presys
    594 	SIMPLE_SYSCALL_PRESYS(%ebx, _syscall_fault)
    595 
    596 _sysenter_call:
    597 	call	*SY_CALLC(%eax)
    598 
    599 _sysenter_done:
    600 	CHECK_POSTSYS_NE(%ebx, %ecx)
    601 	jne	_full_syscall_postsys
    602 	SIMPLE_SYSCALL_POSTSYS(%ebx, %cx)
    603 	/
    604 	/ sysexit uses %edx to restore %eip, so we can't use it
    605 	/ to return a value, sigh.
    606 	/
    607 	movl	%eax, REGOFF_EAX(%esp)
    608 	/ movl	%edx, REGOFF_EDX(%esp)
    609 
    610 	/ Interrupts will be turned on by the 'sti' executed just before
    611 	/ sysexit. The following ensures that restoring the user's EFLAGS
    612 	/ doesn't enable interrupts too soon.
    613 	andl	$_BITNOT(PS_IE), REGOFF_EFL(%esp)
    614 
    615 	MSTATE_TRANSITION(LMS_SYSTEM, LMS_USER)
    616 
    617 	cli
    618 
    619 	SYSCALL_POP
    620 
    621 	popl	%edx			/ sysexit: %edx -> %eip
    622 	addl	$4, %esp		/ get CS off the stack
    623 	popfl				/ EFL
    624 	popl	%ecx			/ sysexit: %ecx -> %esp
    625 	sti
    626 	sysexit
    627 	SET_SIZE(sys_sysenter)
    628 	SET_SIZE(brand_sys_sysenter)
    629 
    630 #endif	/* __lint */
    631 
    632 #if defined(__lint)
    633 /*
    634  * System call via an int80.  This entry point is only used by the Linux
    635  * application environment.  Unlike the sysenter path, there is no default
    636  * action to take if no callback is registered for this process.
    637  */
    638 void
    639 sys_int80()
    640 {}
    641 
    642 #else	/* __lint */
    643 
    644 	ENTRY_NP(brand_sys_int80)
    645 	BRAND_CALLBACK(BRAND_CB_INT80)
    646 
    647 	ALTENTRY(sys_int80)
    648 	/*
    649 	 * We hit an int80, but this process isn't of a brand with an int80
    650 	 * handler.  Bad process!  Make it look as if the INT failed.
    651 	 * Modify %eip to point before the INT, push the expected error
    652 	 * code and fake a GP fault.
    653 	 *
    654 	 */
    655 	subl	$2, (%esp)	/* int insn 2-bytes */
    656 	pushl	$_CONST(_MUL(T_INT80, GATE_DESC_SIZE) + 2)
    657 	jmp	gptrap			/ GP fault
    658 	SET_SIZE(sys_int80)
    659 	SET_SIZE(brand_sys_int80)
    660 
    661 /*
    662  * Declare a uintptr_t which covers the entire pc range of syscall
    663  * handlers for the stack walkers that need this.
    664  */
    665 	.align	CPTRSIZE
    666 	.globl	_allsyscalls_size
    667 	.type	_allsyscalls_size, @object
    668 _allsyscalls_size:
    669 	.NWORD	. - _allsyscalls
    670 	SET_SIZE(_allsyscalls_size)
    671 
    672 #endif	/* __lint */
    673 
    674 /*
    675  * These are the thread context handlers for lwps using sysenter/sysexit.
    676  */
    677 
    678 #if defined(__lint)
    679 
    680 /*ARGSUSED*/
    681 void
    682 sep_save(void *ksp)
    683 {}
    684 
    685 /*ARGSUSED*/
    686 void
    687 sep_restore(void *ksp)
    688 {}
    689 
    690 #else	/* __lint */
    691 
    692 	/*
    693 	 * setting this value to zero as we switch away causes the
    694 	 * stack-pointer-on-sysenter to be NULL, ensuring that we
    695 	 * don't silently corrupt another (preempted) thread stack
    696 	 * when running an lwp that (somehow) didn't get sep_restore'd
    697 	 */
    698 	ENTRY_NP(sep_save)
    699 	xorl	%edx, %edx
    700 	xorl	%eax, %eax
    701 	movl	$MSR_INTC_SEP_ESP, %ecx
    702 	wrmsr
    703 	ret
    704 	SET_SIZE(sep_save)
    705 
    706 	/*
    707 	 * Update the kernel stack pointer as we resume onto this cpu.
    708 	 */
    709 	ENTRY_NP(sep_restore)
    710 	movl	4(%esp), %eax			/* per-lwp kernel sp */
    711 	xorl	%edx, %edx
    712 	movl	$MSR_INTC_SEP_ESP, %ecx
    713 	wrmsr
    714 	ret
    715 	SET_SIZE(sep_restore)
    716 
    717 #endif	/* __lint */
    718 
    719 /*
    720  * Call syscall().  Called from trap() on watchpoint at lcall 0,7
    721  */
    722 
    723 #if defined(__lint)
    724 
    725 void
    726 watch_syscall(void)
    727 {}
    728 
    729 #else	/* __lint */
    730 
    731 	ENTRY_NP(watch_syscall)
    732 	CLI(%eax)
    733 	movl	%gs:CPU_THREAD, %ebx
    734 	movl	T_STACK(%ebx), %esp		/ switch to the thread stack
    735 	movl	REGOFF_EAX(%esp), %eax		/ recover original syscall#
    736 	jmp	_watch_do_syscall
    737 	SET_SIZE(watch_syscall)
    738 
    739 #endif	/* __lint */
    740