Home | History | Annotate | Download | only in amd64
      1 /*
      2  * CDDL HEADER START
      3  *
      4  * The contents of this file are subject to the terms of the
      5  * Common Development and Distribution License (the "License").
      6  * You may not use this file except in compliance with the License.
      7  *
      8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
      9  * or http://www.opensolaris.org/os/licensing.
     10  * See the License for the specific language governing permissions
     11  * and limitations under the License.
     12  *
     13  * When distributing Covered Code, include this CDDL HEADER in each
     14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
     15  * If applicable, add the following below this CDDL HEADER, with the
     16  * fields enclosed by brackets "[]" replaced with your own identifying
     17  * information: Portions Copyright [yyyy] [name of copyright owner]
     18  *
     19  * CDDL HEADER END
     20  */
     21 
     22 /*
     23  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
     24  * Use is subject to license terms.
     25  */
     26 
     27 #pragma ident	"%Z%%M%	%I%	%E% SMI"
     28 
     29 #if	defined(lint)
     30 
     31 #include	<sys/types.h>
     32 #include	<_rtld.h>
     33 #include	<_audit.h>
     34 #include	<_elf.h>
     35 #include	<sys/regset.h>
     36 
     37 /* ARGSUSED0 */
     38 int
     39 elf_plt_trace()
     40 {
     41 	return (0);
     42 }
     43 #else
     44 
     45 #include	<link.h>
     46 #include	<_audit.h>
     47 #include	<sys/asm_linkage.h>
     48 
     49 	.file	"boot_elf.s"
     50 	.text
     51 
     52 /*
     53  * On entry the 'glue code' has already  done the following:
     54  *
     55  *	pushq	%rbp
     56  *	movq	%rsp, %rbp
     57  *	subq	$0x10, %rsp
     58  *	leaq	trace_fields(%rip), %r11
     59  *	movq	%r11, -0x8(%rbp)
     60  *	movq	$elf_plt_trace, %r11
     61  *	jmp	*%r11
     62  *
     63  * so - -8(%rbp) contains the dyndata ptr
     64  *
     65  *	0x0	Addr		*reflmp
     66  *	0x8	Addr		*deflmp
     67  *	0x10	Word		symndx
     68  *	0x14	Word		sb_flags
     69  *	0x18	Sym		symdef.st_name
     70  *	0x1c			symdef.st_info
     71  *	0x1d			symdef.st_other
     72  *	0x1e			symdef.st_shndx
     73  *	0x20			symdef.st_value
     74  *	0x28			symdef.st_size
     75  *
     76  * Also note - on entry 16 bytes have already been subtracted
     77  * from the %rsp.  The first 8 bytes is for the dyn_data_ptr,
     78  * the second 8 bytes are to align the stack and are available
     79  * for use.
     80  */
     81 #define	REFLMP_OFF		0x0
     82 #define	DEFLMP_OFF		0x8
     83 #define	SYMNDX_OFF		0x10
     84 #define	SBFLAGS_OFF		0x14
     85 #define	SYMDEF_OFF		0x18
     86 #define	SYMDEF_VALUE_OFF	0x20
     87 /*
     88  * Local stack space storage for elf_plt_trace is allocated
     89  * as follows:
     90  *
     91  *  First - before we got here - %rsp has been decremented
     92  *  by 0x10 to make space for the dyndata ptr (and another
     93  *  free word).  In addition to that, we create space
     94  *  for the following:
     95  *
     96  *	La_amd64_regs	    8 * 8:	64
     97  *	prev_stack_size	    8		 8
     98  *	Saved regs:
     99  *	    %rdi			 8
    100  *	    %rsi			 8
    101  *	    %rdx			 8
    102  *	    %rcx			 8
    103  *	    %r8				 8
    104  *	    %r9				 8
    105  *	    %r10			 8
    106  *	    %r11			 8
    107  *	    %rax			 8
    108  *				    =======
    109  *			    Subtotal:	144 (16byte aligned)
    110  *
    111  *	Saved Media Regs (used to pass floating point args):
    112  *	    %xmm0 - %xmm7   16 * 8:	128
    113  *				    =======
    114  *			    Total:	272 (16byte aligned)
    115  *
    116  *  So - will subtract the following to create enough space
    117  *
    118  *	-8(%rbp)	store dyndata ptr
    119  *	-16(%rbp)	store call destination
    120  *	-80(%rbp)	space for La_amd64_regs
    121  *	-88(%rbp)	prev stack size
    122  *  The next %rbp offsets are only true if the caller had correct stack
    123  *  alignment.  See note above SPRDIOFF for why we use %rsp alignment to
    124  *  access these stack fields.
    125  *	-96(%rbp)	entering %rdi
    126  *	-104(%rbp)	entering %rsi
    127  *	-112(%rbp)	entering %rdx
    128  *	-120(%rbp)	entering %rcx
    129  *	-128(%rbp)	entering %r8
    130  *	-136(%rbp)	entering %r9
    131  *	-144(%rbp)	entering %r10
    132  *	-152(%rbp)	entering %r11
    133  *	-160(%rbp)	entering %rax
    134  *	-176(%rbp)	entering %xmm0
    135  *	-192(%rbp)	entering %xmm1
    136  *	-208(%rbp)	entering %xmm2
    137  *	-224(%rbp)	entering %xmm3
    138  *	-240(%rbp)	entering %xmm4
    139  *	-256(%rbp)	entering %xmm5
    140  *	-272(%rbp)	entering %xmm6
    141  *	-288(%rbp)	entering %xmm7
    142  *
    143  */
    144 #define	SPDYNOFF    -8
    145 #define	SPDESTOFF   -16
    146 #define	SPLAREGOFF  -80
    147 #define	SPPRVSTKOFF -88
    148 
    149 /*
    150  * The next set of offsets are relative to %rsp.
    151  * We guarantee %rsp is ABI compliant 16-byte aligned.  This guarantees the
    152  * xmm registers are saved to 16-byte aligned addresses.
    153  * %rbp may only be 8 byte aligned if we came in from non-ABI compliant code.
    154  */
    155 #define	SPRDIOFF	192
    156 #define	SPRSIOFF	184
    157 #define	SPRDXOFF	176
    158 #define	SPRCXOFF	168
    159 #define	SPR8OFF		160
    160 #define	SPR9OFF		152
    161 #define	SPR10OFF	144
    162 #define	SPR11OFF	136
    163 #define	SPRAXOFF	128
    164 #define	SPXMM0OFF	112
    165 #define	SPXMM1OFF	96
    166 #define	SPXMM2OFF	80
    167 #define	SPXMM3OFF	64
    168 #define	SPXMM4OFF	48
    169 #define	SPXMM5OFF	32
    170 #define	SPXMM6OFF	16
    171 #define	SPXMM7OFF	0
    172 
    173 	.globl	elf_plt_trace
    174 	.type	elf_plt_trace,@function
    175 	.align 16
    176 elf_plt_trace:
    177 	/*
    178 	 * Enforce ABI 16-byte stack alignment here.
    179 	 * The next andq instruction does this pseudo code:
    180 	 * If %rsp is 8 byte aligned then subtract 8 from %rsp.
    181 	 */
    182 	andq    $-16, %rsp	/* enforce ABI 16-byte stack alignment */
    183 	subq	$272,%rsp	/ create some local storage
    184 
    185 	movq	%rdi, SPRDIOFF(%rsp)
    186 	movq	%rsi, SPRSIOFF(%rsp)
    187 	movq	%rdx, SPRDXOFF(%rsp)
    188 	movq	%rcx, SPRCXOFF(%rsp)
    189 	movq	%r8, SPR8OFF(%rsp)
    190 	movq	%r9, SPR9OFF(%rsp)
    191 	movq	%r10, SPR10OFF(%rsp)
    192 	movq	%r11, SPR11OFF(%rsp)
    193 	movq	%rax, SPRAXOFF(%rsp)
    194 	movdqa	%xmm0, SPXMM0OFF(%rsp)
    195 	movdqa	%xmm1, SPXMM1OFF(%rsp)
    196 	movdqa	%xmm2, SPXMM2OFF(%rsp)
    197 	movdqa	%xmm3, SPXMM3OFF(%rsp)
    198 	movdqa	%xmm4, SPXMM4OFF(%rsp)
    199 	movdqa	%xmm5, SPXMM5OFF(%rsp)
    200 	movdqa	%xmm6, SPXMM6OFF(%rsp)
    201 	movdqa	%xmm7, SPXMM7OFF(%rsp)
    202 
    203 	movq	SPDYNOFF(%rbp), %rax			/ %rax = dyndata
    204 	testb	$LA_SYMB_NOPLTENTER, SBFLAGS_OFF(%rax)	/ <link.h>
    205 	je	.start_pltenter
    206 	movq	SYMDEF_VALUE_OFF(%rax), %rdi
    207 	movq	%rdi, SPDESTOFF(%rbp)		/ save destination address
    208 	jmp	.end_pltenter
    209 
    210 .start_pltenter:
    211 	/*
    212 	 * save all registers into La_amd64_regs
    213 	 */
    214 	leaq	SPLAREGOFF(%rbp), %rsi	/ %rsi = &La_amd64_regs
    215 	leaq	8(%rbp), %rdi
    216 	movq	%rdi, 0(%rsi)		/ la_rsp
    217 	movq	0(%rbp), %rdi
    218 	movq	%rdi, 8(%rsi)		/ la_rbp
    219 	movq	SPRDIOFF(%rsp), %rdi
    220 	movq	%rdi, 16(%rsi)		/ la_rdi
    221 	movq	SPRSIOFF(%rsp), %rdi
    222 	movq	%rdi, 24(%rsi)		/ la_rsi
    223 	movq	SPRDXOFF(%rsp), %rdi
    224 	movq	%rdi, 32(%rsi)		/ la_rdx
    225 	movq	SPRCXOFF(%rsp), %rdi
    226 	movq	%rdi, 40(%rsi)		/ la_rcx
    227 	movq	SPR8OFF(%rsp), %rdi
    228 	movq	%rdi, 48(%rsi)		/ la_r8
    229 	movq	SPR9OFF(%rsp), %rdi
    230 	movq	%rdi, 56(%rsi)		/ la_r9
    231 
    232 	/*
    233 	 * prepare for call to la_pltenter
    234 	 */
    235 	movq	SPDYNOFF(%rbp), %r11		/ %r11 = &dyndata
    236 	leaq	SBFLAGS_OFF(%r11), %r9		/ arg6 (&sb_flags)
    237 	leaq	SPLAREGOFF(%rbp), %r8		/ arg5 (&La_amd64_regs)
    238 	movl	SYMNDX_OFF(%r11), %ecx		/ arg4 (symndx)
    239 	leaq	SYMDEF_OFF(%r11), %rdx		/ arg3 (&Sym)
    240 	movq	DEFLMP_OFF(%r11), %rsi		/ arg2 (dlmp)
    241 	movq	REFLMP_OFF(%r11), %rdi		/ arg1 (rlmp)
    242 	call	audit_pltenter@PLT
    243 	movq	%rax, SPDESTOFF(%rbp)		/ save calling address
    244 .end_pltenter:
    245 
    246 	/*
    247 	 * If *no* la_pltexit() routines exist
    248 	 * we do not need to keep the stack frame
    249 	 * before we call the actual routine.  Instead we
    250 	 * jump to it and remove our stack from the stack
    251 	 * at the same time.
    252 	 */
    253 	movl	audit_flags(%rip), %eax
    254 	andl	$AF_PLTEXIT, %eax		/ value of audit.h:AF_PLTEXIT
    255 	cmpl	$0, %eax
    256 	je	.bypass_pltexit
    257 	/*
    258 	 * Has the *nopltexit* flag been set for this entry point
    259 	 */
    260 	movq	SPDYNOFF(%rbp), %r11		/ %r11 = &dyndata
    261 	testb	$LA_SYMB_NOPLTEXIT, SBFLAGS_OFF(%r11)
    262 	je	.start_pltexit
    263 
    264 .bypass_pltexit:
    265 	/*
    266 	 * No PLTEXIT processing required.
    267 	 */
    268 	movq	0(%rbp), %r11
    269 	movq	%r11, -8(%rbp)			/ move prev %rbp
    270 	movq	SPDESTOFF(%rbp), %r11		/ r11 == calling destination
    271 	movq	%r11, 0(%rbp)			/ store destination at top
    272 
    273 	/
    274 	/ Restore registers
    275 	/
    276 	movq	SPRDIOFF(%rsp), %rdi
    277 	movq	SPRSIOFF(%rsp), %rsi
    278 	movq	SPRDXOFF(%rsp), %rdx
    279 	movq	SPRCXOFF(%rsp), %rcx
    280 	movq	SPR8OFF(%rsp), %r8
    281 	movq	SPR9OFF(%rsp), %r9
    282 	movq	SPR10OFF(%rsp), %r10
    283 	movq	SPR11OFF(%rsp), %r11
    284 	movq	SPRAXOFF(%rsp), %rax
    285 	movdqa	SPXMM0OFF(%rsp), %xmm0
    286 	movdqa	SPXMM1OFF(%rsp), %xmm1
    287 	movdqa	SPXMM2OFF(%rsp), %xmm2
    288 	movdqa	SPXMM3OFF(%rsp), %xmm3
    289 	movdqa	SPXMM4OFF(%rsp), %xmm4
    290 	movdqa	SPXMM5OFF(%rsp), %xmm5
    291 	movdqa	SPXMM6OFF(%rsp), %xmm6
    292 	movdqa	SPXMM7OFF(%rsp), %xmm7
    293 
    294 	subq	$8, %rbp			/ adjust %rbp for 'ret'
    295 	movq	%rbp, %rsp			/
    296 	/*
    297 	 * At this point, after a little doctoring, we should
    298 	 * have the following on the stack:
    299 	 *
    300 	 *	16(%rsp):  ret addr
    301 	 *	8(%rsp):  dest_addr
    302 	 *	0(%rsp):  Previous %rbp
    303 	 *
    304 	 * So - we pop the previous %rbp, and then
    305 	 * ret to our final destination.
    306 	 */
    307 	popq	%rbp				/
    308 	ret					/ jmp to final destination
    309 						/ and clean up stack :)
    310 
    311 .start_pltexit:
    312 	/*
    313 	 * In order to call the destination procedure and then return
    314 	 * to audit_pltexit() for post analysis we must first grow
    315 	 * our stack frame and then duplicate the original callers
    316 	 * stack state.  This duplicates all of the arguements
    317 	 * that were to be passed to the destination procedure.
    318 	 */
    319 	movq	%rbp, %rdi			/
    320 	addq	$16, %rdi			/    %rdi = src
    321 	movq	(%rbp), %rdx			/
    322 	subq	%rdi, %rdx			/    %rdx == prev frame sz
    323 	/*
    324 	 * If audit_argcnt > 0 then we limit the number of
    325 	 * arguements that will be duplicated to audit_argcnt.
    326 	 *
    327 	 * If (prev_stack_size > (audit_argcnt * 8))
    328 	 *	prev_stack_size = audit_argcnt * 8;
    329 	 */
    330 	movl	audit_argcnt(%rip),%eax		/   %eax = audit_argcnt
    331 	cmpl	$0, %eax
    332 	jle	.grow_stack
    333 	leaq	(,%rax,8), %rax			/    %eax = %eax * 4
    334 	cmpq	%rax,%rdx
    335 	jle	.grow_stack
    336 	movq	%rax, %rdx
    337 	/*
    338 	 * Grow the stack and duplicate the arguements of the
    339 	 * original caller.
    340 	 *
    341 	 * We save %rsp in %r11 since we need to use the current rsp for
    342 	 * accessing the registers saved in our stack frame.
    343 	 */
    344 .grow_stack:
    345 	movq	%rsp, %r11
    346 	subq	%rdx, %rsp			/    grow the stack
    347 	movq	%rdx, SPPRVSTKOFF(%rbp)		/    -88(%rbp) == prev frame sz
    348 	movq	%rsp, %rcx			/    %rcx = dest
    349 	addq	%rcx, %rdx			/    %rdx == tail of dest
    350 .while_base:
    351 	cmpq	%rdx, %rcx			/   while (base+size >= src++) {
    352 	jge	.end_while			/
    353 	movq	(%rdi), %rsi
    354 	movq	%rsi,(%rcx)			/        *dest = *src
    355 	addq	$8, %rdi			/	 src++
    356 	addq	$8, %rcx			/        dest++
    357 	jmp	.while_base			/    }
    358 
    359 	/*
    360 	 * The above stack is now an exact duplicate of
    361 	 * the stack of the original calling procedure.
    362 	 */
    363 .end_while:
    364 	/
    365 	/ Restore registers using %r11 which contains our old %rsp value
    366 	/ before growing the stack.
    367 	/
    368 	movq	SPRDIOFF(%r11), %rdi
    369 	movq	SPRSIOFF(%r11), %rsi
    370 	movq	SPRDXOFF(%r11), %rdx
    371 	movq	SPRCXOFF(%r11), %rcx
    372 	movq	SPR8OFF(%r11), %r8
    373 	movq	SPR9OFF(%r11), %r9
    374 	movq	SPR10OFF(%r11), %r10
    375 	movq	SPRAXOFF(%r11), %rax
    376 	movdqa	SPXMM0OFF(%r11), %xmm0
    377 	movdqa	SPXMM1OFF(%r11), %xmm1
    378 	movdqa	SPXMM2OFF(%r11), %xmm2
    379 	movdqa	SPXMM3OFF(%r11), %xmm3
    380 	movdqa	SPXMM4OFF(%r11), %xmm4
    381 	movdqa	SPXMM5OFF(%r11), %xmm5
    382 	movdqa	SPXMM6OFF(%r11), %xmm6
    383 	movdqa	SPXMM7OFF(%r11), %xmm7
    384 	movq	SPR11OFF(%r11), %r11		/ retore %r11 last
    385 
    386 	/*
    387 	 * Call to desitnation function - we'll return here
    388 	 * for pltexit monitoring.
    389 	 */
    390 	call	*SPDESTOFF(%rbp)
    391 
    392 	addq	SPPRVSTKOFF(%rbp), %rsp	/ cleanup dupped stack
    393 
    394 	/
    395 	/ prepare for call to audit_pltenter()
    396 	/
    397 	movq	SPDYNOFF(%rbp), %r11		/ %r11 = &dyndata
    398 	movq	SYMNDX_OFF(%r11), %r8		/ arg5 (symndx)
    399 	leaq	SYMDEF_OFF(%r11), %rcx		/ arg4 (&Sym)
    400 	movq	DEFLMP_OFF(%r11), %rdx		/ arg3 (dlmp)
    401 	movq	REFLMP_OFF(%r11), %rsi		/ arg2 (rlmp)
    402 	movq	%rax, %rdi			/ arg1 (returnval)
    403 	call	audit_pltexit@PLT
    404 
    405 	/*
    406 	 * Clean up after ourselves and return to the
    407 	 * original calling procedure.
    408 	 */
    409 
    410 	/
    411 	/ Restore registers
    412 	/
    413 	movq	SPRDIOFF(%rsp), %rdi
    414 	movq	SPRSIOFF(%rsp), %rsi
    415 	movq	SPRDXOFF(%rsp), %rdx
    416 	movq	SPRCXOFF(%rsp), %rcx
    417 	movq	SPR8OFF(%rsp), %r8
    418 	movq	SPR9OFF(%rsp), %r9
    419 	movq	SPR10OFF(%rsp), %r10
    420 	movq	SPR11OFF(%rsp), %r11
    421 	// rax already contains return value
    422 	movdqa	SPXMM0OFF(%rsp), %xmm0
    423 	movdqa	SPXMM1OFF(%rsp), %xmm1
    424 	movdqa	SPXMM2OFF(%rsp), %xmm2
    425 	movdqa	SPXMM3OFF(%rsp), %xmm3
    426 	movdqa	SPXMM4OFF(%rsp), %xmm4
    427 	movdqa	SPXMM5OFF(%rsp), %xmm5
    428 	movdqa	SPXMM6OFF(%rsp), %xmm6
    429 	movdqa	SPXMM7OFF(%rsp), %xmm7
    430 
    431 	movq	%rbp, %rsp			/
    432 	popq	%rbp				/
    433 	ret					/ return to caller
    434 	.size	elf_plt_trace, .-elf_plt_trace
    435 #endif
    436 
    437 /*
    438  * We got here because a call to a function resolved to a procedure
    439  * linkage table entry.  That entry did a JMPL to the first PLT entry, which
    440  * in turn did a call to elf_rtbndr.
    441  *
    442  * the code sequence that got us here was:
    443  *
    444  * .PLT0:
    445  *	pushq	GOT+8(%rip)	#GOT[1]
    446  *	jmp	*GOT+16(%rip)	#GOT[2]
    447  *	nop
    448  *	nop
    449  *	nop
    450  *	nop
    451  *	...
    452  * PLT entry for foo:
    453  *	jmp	*name1@GOTPCREL(%rip)
    454  *	pushl	$rel.plt.foo
    455  *	jmp	PLT0
    456  *
    457  * At entry, the stack looks like this:
    458  *
    459  *	return address			16(%rsp)
    460  *	$rel.plt.foo	(plt index)	8(%rsp)
    461  *	lmp				0(%rsp)
    462  *
    463  */
    464 #if defined(lint)
    465 
    466 extern unsigned long	elf_bndr(Rt_map *, unsigned long, caddr_t);
    467 
    468 void
    469 elf_rtbndr(Rt_map * lmp, unsigned long reloc, caddr_t pc)
    470 {
    471 	(void) elf_bndr(lmp, reloc, pc);
    472 }
    473 
    474 #else
    475 
    476 /*
    477  * The PLT code that landed us here placed 2 arguments on the stack as
    478  * arguments to elf_rtbndr.
    479  * Additionally the pc of caller is below these 2 args.
    480  * Our stack will look like this after we establish a stack frame with
    481  * push %rbp; movq %rsp, %rbp sequence:
    482  *
    483  *	8(%rbp)			arg1 - *lmp
    484  *	16(%rbp), %rsi		arg2 - reloc index
    485  *	24(%rbp), %rdx		arg3 - pc of caller
    486  */
    487 #define	LBPLMPOFF	8	/* arg1 - *lmp */
    488 #define	LBPRELOCOFF	16	/* arg2 - reloc index */
    489 #define	LBRPCOFF	24	/* arg3 - pc of caller */
    490 
    491 /*
    492  * Possible arguments for the resolved function are in registers as per
    493  * the AMD64 ABI.  We must save on the local stack all possible register
    494  * arguments before interposing functions to resolve the called function.
    495  * Possible arguments must be restored before invoking the resolved function.
    496  *
    497  * Local stack space storage for elf_rtbndr is allocated as follows:
    498  *
    499  *	Saved regs:
    500  *	    %rax			 8
    501  *	    %rdi			 8
    502  *	    %rsi			 8
    503  *	    %rdx			 8
    504  *	    %rcx			 8
    505  *	    %r8				 8
    506  *	    %r9				 8
    507  *	    %r10			 8
    508  *				    =======
    509  *			    Subtotal:   64 (16byte aligned)
    510  *
    511  *	Saved Media Regs (used to pass floating point args):
    512  *	    %xmm0 - %xmm7   16 * 8:    128
    513  *				    =======
    514  *			    Total:     192 (16byte aligned)
    515  *
    516  *  So - will subtract the following to create enough space
    517  *
    518  *	0(%rsp)		save %rax
    519  *	8(%rsp)		save %rdi
    520  *	16(%rsp)	save %rsi
    521  *	24(%rsp)	save %rdx
    522  *	32(%rsp)	save %rcx
    523  *	40(%rsp)	save %r8
    524  *	48(%rsp)	save %r9
    525  *	56(%rsp)	save %r10
    526  *	64(%rsp)	save %xmm0
    527  *	80(%rsp)	save %xmm1
    528  *	96(%rsp)	save %xmm2
    529  *	112(%rsp)	save %xmm3
    530  *	128(%rsp)	save %xmm4
    531  *	144(%rsp)	save %xmm5
    532  *	160(%rsp)	save %xmm6
    533  *	176(%rsp)	save %xmm7
    534  *
    535  * Note: Some callers may use 8-byte stack alignment instead of the
    536  * ABI required 16-byte alignment.  We use %rsp offsets to save/restore
    537  * registers because %rbp may not be 16-byte aligned.  We guarantee %rsp
    538  * is 16-byte aligned in the function preamble.
    539  */
    540 #define	LS_SIZE	$192	/* local stack space to save all possible arguments */
    541 #define	LSRAXOFF	0	/* for SSE register count */
    542 #define	LSRDIOFF	8	/* arg 0 ... */
    543 #define	LSRSIOFF	16
    544 #define	LSRDXOFF	24
    545 #define	LSRCXOFF	32
    546 #define	LSR8OFF		40
    547 #define	LSR9OFF		48
    548 #define	LSR10OFF	56	/* ... arg 5 */
    549 #define	LSXMM0OFF	64	/* SSE arg 0 ... */
    550 #define	LSXMM1OFF	80
    551 #define	LSXMM2OFF	96
    552 #define	LSXMM3OFF	112
    553 #define	LSXMM4OFF	128
    554 #define	LSXMM5OFF	144
    555 #define	LSXMM6OFF	160
    556 #define	LSXMM7OFF	176	/* ... SSE arg 7 */
    557 
    558 	.weak	_elf_rtbndr
    559 	_elf_rtbndr = elf_rtbndr
    560 
    561 	ENTRY(elf_rtbndr)
    562 
    563 	pushq	%rbp
    564 	movq	%rsp, %rbp
    565 
    566 	/*
    567 	 * Some libraries may (incorrectly) use non-ABI compliant 8-byte stack
    568 	 * alignment.  Enforce ABI 16-byte stack alignment here.
    569 	 * The next andq instruction does this pseudo code:
    570 	 * If %rsp is 8 byte aligned then subtract 8 from %rsp.
    571 	 */
    572 	andq	$-16, %rsp	/* enforce ABI 16-byte stack alignment */
    573 
    574 	subq	LS_SIZE, %rsp	/* save all ABI defined argument registers */
    575 
    576 	movq	%rax, LSRAXOFF(%rsp)	/* for SSE register count */
    577 	movq	%rdi, LSRDIOFF(%rsp)	/*  arg 0 .. */
    578 	movq	%rsi, LSRSIOFF(%rsp)
    579 	movq	%rdx, LSRDXOFF(%rsp)
    580 	movq	%rcx, LSRCXOFF(%rsp)
    581 	movq	%r8, LSR8OFF(%rsp)
    582 	movq	%r9, LSR9OFF(%rsp)	/* .. arg 5 */
    583 	movq	%r10, LSR10OFF(%rsp)	/* call chain reg */
    584 
    585 	movdqa	%xmm0, LSXMM0OFF(%rsp)	/* SSE arg 0 ... */
    586 	movdqa	%xmm1, LSXMM1OFF(%rsp)
    587 	movdqa	%xmm2, LSXMM2OFF(%rsp)
    588 	movdqa	%xmm3, LSXMM3OFF(%rsp)
    589 	movdqa	%xmm4, LSXMM4OFF(%rsp)
    590 	movdqa	%xmm5, LSXMM5OFF(%rsp)
    591 	movdqa	%xmm6, LSXMM6OFF(%rsp)
    592 	movdqa	%xmm7, LSXMM7OFF(%rsp)	/* ... SSE arg 7 */
    593 
    594 	movq	LBPLMPOFF(%rbp), %rdi	/* arg1 - *lmp */
    595 	movq	LBPRELOCOFF(%rbp), %rsi	/* arg2 - reloc index */
    596 	movq	LBRPCOFF(%rbp), %rdx	/* arg3 - pc of caller */
    597 	call	elf_bndr@PLT		/* call elf_rtbndr(lmp, relndx, pc) */
    598 	movq	%rax, LBPRELOCOFF(%rbp)	/* store final destination */
    599 
    600 	/* restore possible arguments before invoking resolved function */
    601 	movq	LSRAXOFF(%rsp), %rax
    602 	movq	LSRDIOFF(%rsp), %rdi
    603 	movq	LSRSIOFF(%rsp), %rsi
    604 	movq	LSRDXOFF(%rsp), %rdx
    605 	movq	LSRCXOFF(%rsp), %rcx
    606 	movq	LSR8OFF(%rsp), %r8
    607 	movq	LSR9OFF(%rsp), %r9
    608 	movq	LSR10OFF(%rsp), %r10
    609 
    610 	movdqa	LSXMM0OFF(%rsp), %xmm0
    611 	movdqa	LSXMM1OFF(%rsp), %xmm1
    612 	movdqa	LSXMM2OFF(%rsp), %xmm2
    613 	movdqa	LSXMM3OFF(%rsp), %xmm3
    614 	movdqa	LSXMM4OFF(%rsp), %xmm4
    615 	movdqa	LSXMM5OFF(%rsp), %xmm5
    616 	movdqa	LSXMM6OFF(%rsp), %xmm6
    617 	movdqa	LSXMM7OFF(%rsp), %xmm7
    618 
    619 	movq	%rbp, %rsp
    620 	popq	%rbp
    621 
    622 	addq	$8, %rsp	/* pop 1st plt-pushed args */
    623 				/* the second arguement is used */
    624 				/* for the 'return' address to our */
    625 				/* final destination */
    626 
    627 	ret			/* invoke resolved function */
    628 	.size 	elf_rtbndr, .-elf_rtbndr
    629 #endif
    630