OpenGrok

Cross Reference: aes_intel.s
xref: /onnv/onnv-gate/usr/src/common/crypto/aes/amd64/aes_intel.s
Home | History | Annotate | Line # | Download | only in amd64
      1 /*
      2  * ====================================================================
      3  * Written by Intel Corporation for the OpenSSL project to add support
      4  * for Intel AES-NI instructions. Rights for redistribution and usage
      5  * in source and binary forms are granted according to the OpenSSL
      6  * license.
      7  *
      8  *   Author: Huang Ying <ying.huang at intel dot com>
      9  *           Vinodh Gopal <vinodh.gopal at intel dot com>
     10  *           Kahraman Akdemir
     11  *
     12  * Intel AES-NI is a new set of Single Instruction Multiple Data (SIMD)
     13  * instructions that are going to be introduced in the next generation
     14  * of Intel processor, as of 2009. These instructions enable fast and
     15  * secure data encryption and decryption, using the Advanced Encryption
     16  * Standard (AES), defined by FIPS Publication number 197. The
     17  * architecture introduces six instructions that offer full hardware
     18  * support for AES. Four of them support high performance data
     19  * encryption and decryption, and the other two instructions support
     20  * the AES key expansion procedure.
     21  * ====================================================================
     22  */
     23 
     24 /*
     25  * ====================================================================
     26  * Copyright (c) 1998-2008 The OpenSSL Project.  All rights reserved.
     27  *
     28  * Redistribution and use in source and binary forms, with or without
     29  * modification, are permitted provided that the following conditions
     30  * are met:
     31  *
     32  * 1. Redistributions of source code must retain the above copyright
     33  *    notice, this list of conditions and the following disclaimer.
     34  *
     35  * 2. Redistributions in binary form must reproduce the above copyright
     36  *    notice, this list of conditions and the following disclaimer in
     37  *    the documentation and/or other materials provided with the
     38  *    distribution.
     39  *
     40  * 3. All advertising materials mentioning features or use of this
     41  *    software must display the following acknowledgment:
     42  *    "This product includes software developed by the OpenSSL Project
     43  *    for use in the OpenSSL Toolkit. (http://www.openssl.org/)"
     44  *
     45  * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
     46  *    endorse or promote products derived from this software without
     47  *    prior written permission. For written permission, please contact
     48  *    openssl-core (at) openssl.org.
     49  *
     50  * 5. Products derived from this software may not be called "OpenSSL"
     51  *    nor may "OpenSSL" appear in their names without prior written
     52  *    permission of the OpenSSL Project.
     53  *
     54  * 6. Redistributions of any form whatsoever must retain the following
     55  *    acknowledgment:
     56  *    "This product includes software developed by the OpenSSL Project
     57  *    for use in the OpenSSL Toolkit (http://www.openssl.org/)"
     58  *
     59  * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
     60  * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     61  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     62  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE OpenSSL PROJECT OR
     63  * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
     64  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
     65  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
     66  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     67  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
     68  * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     69  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
     70  * OF THE POSSIBILITY OF SUCH DAMAGE.
     71  * ====================================================================
     72  */
     73 
     74 /*
     75  * ====================================================================
     76  * OpenSolaris OS modifications
     77  *
     78  * This source originates as files aes-intel.S and eng_aesni_asm.pl, in
     79  * patches sent sent Dec. 9, 2008 and Dec. 24, 2008, respectively, by
     80  * Huang Ying of Intel to the openssl-dev mailing list under the subject
     81  * of "Add support to Intel AES-NI instruction set for x86_64 platform".
     82  *
     83  * This OpenSolaris version has these major changes from the original source:
     84  *
     85  * 1. Added OpenSolaris ENTRY_NP/SET_SIZE macros from
     86  * /usr/include/sys/asm_linkage.h, lint(1B) guards, EXPORT DELETE START
     87  * and EXPORT DELETE END markers, and dummy C function definitions for lint.
     88  *
     89  * 2. Formatted code, added comments, and added #includes and #defines.
     90  *
     91  * 3. If bit CR0.TS is set, clear and set the TS bit, after and before
     92  * calling kpreempt_disable() and kpreempt_enable().
     93  * If the TS bit is not set, Save and restore %xmm registers at the beginning
     94  * and end of function calls (%xmm* registers are not saved and restored by
     95  * during kernel thread preemption).
     96  *
     97  * 4. Renamed functions, reordered parameters, and changed return value
     98  * to match OpenSolaris:
     99  *
    100  * OpenSSL interface:
    101  *	int intel_AES_set_encrypt_key(const unsigned char *userKey,
    102  *		const int bits, AES_KEY *key);
    103  *	int intel_AES_set_decrypt_key(const unsigned char *userKey,
    104  *		const int bits, AES_KEY *key);
    105  *	Return values for above are non-zero on error, 0 on success.
    106  *
    107  *	void intel_AES_encrypt(const unsigned char *in, unsigned char *out,
    108  *		const AES_KEY *key);
    109  *	void intel_AES_decrypt(const unsigned char *in, unsigned char *out,
    110  *		const AES_KEY *key);
    111  *	typedef struct aes_key_st {
    112  *		unsigned int	rd_key[4 *(AES_MAXNR + 1)];
    113  *		int		rounds;
    114  *		unsigned int	pad[3];
    115  *	} AES_KEY;
    116  * Note: AES_LONG is undefined (that is, Intel uses 32-bit key schedules
    117  * (ks32) instead of 64-bit (ks64).
    118  * Number of rounds (aka round count) is at offset 240 of AES_KEY.
    119  *
    120  * OpenSolaris OS interface (#ifdefs removed for readability):
    121  *	int rijndael_key_setup_dec_intel(uint32_t rk[],
    122  *		const uint32_t cipherKey[], uint64_t keyBits);
    123  *	int rijndael_key_setup_enc_intel(uint32_t rk[],
    124  *		const uint32_t cipherKey[], uint64_t keyBits);
    125  *	Return values for above are 0 on error, number of rounds on success.
    126  *
    127  *	void aes_encrypt_intel(const aes_ks_t *ks, int Nr,
    128  *		const uint32_t pt[4], uint32_t ct[4]);
    129  *	void aes_decrypt_intel(const aes_ks_t *ks, int Nr,
    130  *		const uint32_t pt[4], uint32_t ct[4]);
    131  *	typedef union {uint64_t ks64[(MAX_AES_NR + 1) * 4];
    132  *		 uint32_t ks32[(MAX_AES_NR + 1) * 4]; } aes_ks_t;
    133  *
    134  *	typedef union {
    135  *		uint32_t	ks32[((MAX_AES_NR) + 1) * (MAX_AES_NB)];
    136  *	} aes_ks_t;
    137  *	typedef struct aes_key {
    138  *		aes_ks_t	encr_ks, decr_ks;
    139  *		long double	align128;
    140  *		int		flags, nr, type;
    141  *	} aes_key_t;
    142  *
    143  * Note: ks is the AES key schedule, Nr is number of rounds, pt is plain text,
    144  * ct is crypto text, and MAX_AES_NR is 14.
    145  * For the x86 64-bit architecture, OpenSolaris OS uses ks32 instead of ks64.
    146  *
    147  * Note2: aes_ks_t must be aligned on a 0 mod 128 byte boundary.
    148  *
    149  * ====================================================================
    150  */
    151 
    152 #if defined(lint) || defined(__lint)
    153 
    154 #include <sys/types.h>
    155 
    156 /* ARGSUSED */
    157 void
    158 aes_encrypt_intel(const uint32_t rk[], int Nr, const uint32_t pt[4],
    159     uint32_t ct[4]) {
    160 }
    161 /* ARGSUSED */
    162 void
    163 aes_decrypt_intel(const uint32_t rk[], int Nr, const uint32_t ct[4],
    164     uint32_t pt[4]) {
    165 }
    166 /* ARGSUSED */
    167 int
    168 rijndael_key_setup_enc_intel(uint32_t rk[], const uint32_t cipherKey[],
    169     uint64_t keyBits) {
    170 	return (0);
    171 }
    172 /* ARGSUSED */
    173 int
    174 rijndael_key_setup_dec_intel(uint32_t rk[], const uint32_t cipherKey[],
    175    uint64_t keyBits) {
    176 	return (0);
    177 }
    178 
    179 
    180 #else	/* lint */
    181 
    182 #include <sys/asm_linkage.h>
    183 #include <sys/controlregs.h>
    184 #ifdef _KERNEL
    185 #include <sys/machprivregs.h>
    186 #endif
    187 
    188 #ifdef _KERNEL
    189 	/*
    190 	 * Note: the CLTS macro clobbers P2 (%rsi) under i86xpv.  That is,
    191 	 * it calls HYPERVISOR_fpu_taskswitch() which modifies %rsi when it
    192 	 * uses it to pass P2 to syscall.
    193 	 * This also occurs with the STTS macro, but we don't care if
    194 	 * P2 (%rsi) is modified just before function exit.
    195 	 * The CLTS and STTS macros push and pop P1 (%rdi) already.
    196 	 */
    197 #ifdef __xpv
    198 #define	PROTECTED_CLTS \
    199 	push	%rsi; \
    200 	CLTS; \
    201 	pop	%rsi
    202 #else
    203 #define	PROTECTED_CLTS \
    204 	CLTS
    205 #endif	/* __xpv */
    206 
    207 #define	CLEAR_TS_OR_PUSH_XMM0_XMM1(tmpreg) \
    208 	push	%rbp; \
    209 	mov	%rsp, %rbp; \
    210 	movq	%cr0, tmpreg; \
    211 	testq	$CR0_TS, tmpreg; \
    212 	jnz	1f; \
    213 	and	$-XMM_ALIGN, %rsp; \
    214 	sub	$[XMM_SIZE * 2], %rsp; \
    215 	movaps	%xmm0, 16(%rsp); \
    216 	movaps	%xmm1, (%rsp); \
    217 	jmp	2f; \
    218 1: \
    219 	PROTECTED_CLTS; \
    220 2:
    221 
    222 	/*
    223 	 * If CR0_TS was not set above, pop %xmm0 and %xmm1 off stack,
    224 	 * otherwise set CR0_TS.
    225 	 */
    226 #define	SET_TS_OR_POP_XMM0_XMM1(tmpreg) \
    227 	testq	$CR0_TS, tmpreg; \
    228 	jnz	1f; \
    229 	movaps	(%rsp), %xmm1; \
    230 	movaps	16(%rsp), %xmm0; \
    231 	jmp	2f; \
    232 1: \
    233 	STTS(tmpreg); \
    234 2: \
    235 	mov	%rbp, %rsp; \
    236 	pop	%rbp
    237 
    238 	/*
    239 	 * If CR0_TS is not set, align stack (with push %rbp) and push
    240 	 * %xmm0 - %xmm6 on stack, otherwise clear CR0_TS
    241 	 */
    242 #define	CLEAR_TS_OR_PUSH_XMM0_TO_XMM6(tmpreg) \
    243 	push	%rbp; \
    244 	mov	%rsp, %rbp; \
    245 	movq	%cr0, tmpreg; \
    246 	testq	$CR0_TS, tmpreg; \
    247 	jnz	1f; \
    248 	and	$-XMM_ALIGN, %rsp; \
    249 	sub	$[XMM_SIZE * 7], %rsp; \
    250 	movaps	%xmm0, 96(%rsp); \
    251 	movaps	%xmm1, 80(%rsp); \
    252 	movaps	%xmm2, 64(%rsp); \
    253 	movaps	%xmm3, 48(%rsp); \
    254 	movaps	%xmm4, 32(%rsp); \
    255 	movaps	%xmm5, 16(%rsp); \
    256 	movaps	%xmm6, (%rsp); \
    257 	jmp	2f; \
    258 1: \
    259 	PROTECTED_CLTS; \
    260 2:
    261 
    262 
    263 	/*
    264 	 * If CR0_TS was not set above, pop %xmm0 - %xmm6 off stack,
    265 	 * otherwise set CR0_TS.
    266 	 */
    267 #define	SET_TS_OR_POP_XMM0_TO_XMM6(tmpreg) \
    268 	testq	$CR0_TS, tmpreg; \
    269 	jnz	1f; \
    270 	movaps	(%rsp), %xmm6; \
    271 	movaps	16(%rsp), %xmm5; \
    272 	movaps	32(%rsp), %xmm4; \
    273 	movaps	48(%rsp), %xmm3; \
    274 	movaps	64(%rsp), %xmm2; \
    275 	movaps	80(%rsp), %xmm1; \
    276 	movaps	96(%rsp), %xmm0; \
    277 	jmp	2f; \
    278 1: \
    279 	STTS(tmpreg); \
    280 2: \
    281 	mov	%rbp, %rsp; \
    282 	pop	%rbp
    283 
    284 
    285 #else
    286 #define	PROTECTED_CLTS
    287 #define	CLEAR_TS_OR_PUSH_XMM0_XMM1(tmpreg)
    288 #define	SET_TS_OR_POP_XMM0_XMM1(tmpreg)
    289 #define	CLEAR_TS_OR_PUSH_XMM0_TO_XMM6(tmpreg)
    290 #define	SET_TS_OR_POP_XMM0_TO_XMM6(tmpreg)
    291 #endif	/* _KERNEL */
    292 
    293 
    294 /*
    295  * _key_expansion_128(), * _key_expansion_192a(), _key_expansion_192b(),
    296  * _key_expansion_256a(), _key_expansion_256b()
    297  *
    298  * Helper functions called by rijndael_key_setup_inc_intel().
    299  * Also used indirectly by rijndael_key_setup_dec_intel().
    300  *
    301  * Input:
    302  * %xmm0	User-provided cipher key
    303  * %xmm1	Round constant
    304  * Output:
    305  * (%rcx)	AES key
    306  */
    307 
    308 	/* EXPORT DELETE START */
    309 .align	16
    310 _key_expansion_128:
    311 _key_expansion_256a:
    312 	pshufd	$0b11111111, %xmm1, %xmm1
    313 	shufps	$0b00010000, %xmm0, %xmm4
    314 	pxor	%xmm4, %xmm0
    315 	shufps	$0b10001100, %xmm0, %xmm4
    316 	pxor	%xmm4, %xmm0
    317 	pxor	%xmm1, %xmm0
    318 	movaps	%xmm0, (%rcx)
    319 	add	$0x10, %rcx
    320 	ret
    321 	SET_SIZE(_key_expansion_128)
    322 	SET_SIZE(_key_expansion_256a)
    323 
    324 .align 16
    325 _key_expansion_192a:
    326 	pshufd	$0b01010101, %xmm1, %xmm1
    327 	shufps	$0b00010000, %xmm0, %xmm4
    328 	pxor	%xmm4, %xmm0
    329 	shufps	$0b10001100, %xmm0, %xmm4
    330 	pxor	%xmm4, %xmm0
    331 	pxor	%xmm1, %xmm0
    332 
    333 	movaps	%xmm2, %xmm5
    334 	movaps	%xmm2, %xmm6
    335 	pslldq	$4, %xmm5
    336 	pshufd	$0b11111111, %xmm0, %xmm3
    337 	pxor	%xmm3, %xmm2
    338 	pxor	%xmm5, %xmm2
    339 
    340 	movaps	%xmm0, %xmm1
    341 	shufps	$0b01000100, %xmm0, %xmm6
    342 	movaps	%xmm6, (%rcx)
    343 	shufps	$0b01001110, %xmm2, %xmm1
    344 	movaps	%xmm1, 0x10(%rcx)
    345 	add	$0x20, %rcx
    346 	ret
    347 	SET_SIZE(_key_expansion_192a)
    348 
    349 .align 16
    350 _key_expansion_192b:
    351 	pshufd	$0b01010101, %xmm1, %xmm1
    352 	shufps	$0b00010000, %xmm0, %xmm4
    353 	pxor	%xmm4, %xmm0
    354 	shufps	$0b10001100, %xmm0, %xmm4
    355 	pxor	%xmm4, %xmm0
    356 	pxor	%xmm1, %xmm0
    357 
    358 	movaps	%xmm2, %xmm5
    359 	pslldq	$4, %xmm5
    360 	pshufd	$0b11111111, %xmm0, %xmm3
    361 	pxor	%xmm3, %xmm2
    362 	pxor	%xmm5, %xmm2
    363 
    364 	movaps	%xmm0, (%rcx)
    365 	add	$0x10, %rcx
    366 	ret
    367 	SET_SIZE(_key_expansion_192b)
    368 
    369 .align 16
    370 _key_expansion_256b:
    371 	pshufd	$0b10101010, %xmm1, %xmm1
    372 	shufps	$0b00010000, %xmm2, %xmm4
    373 	pxor	%xmm4, %xmm2
    374 	shufps	$0b10001100, %xmm2, %xmm4
    375 	pxor	%xmm4, %xmm2
    376 	pxor	%xmm1, %xmm2
    377 	movaps	%xmm2, (%rcx)
    378 	add	$0x10, %rcx
    379 	ret
    380 	SET_SIZE(_key_expansion_256b)
    381 	/* EXPORT DELETE END */
    382 
    383 
    384 /*
    385  * rijndael_key_setup_enc_intel()
    386  * Expand the cipher key into the encryption key schedule.
    387  *
    388  * For kernel code, caller is responsible for ensuring kpreempt_disable()
    389  * has been called.  This is because %xmm registers are not saved/restored.
    390  * Clear and set the CR0.TS bit on entry and exit, respectively,  if TS is set
    391  * on entry.  Otherwise, if TS is not set, save and restore %xmm registers
    392  * on the stack.
    393  *
    394  * OpenSolaris interface:
    395  * int rijndael_key_setup_enc_intel(uint32_t rk[], const uint32_t cipherKey[],
    396  *	uint64_t keyBits);
    397  * Return value is 0 on error, number of rounds on success.
    398  *
    399  * Original Intel OpenSSL interface:
    400  * int intel_AES_set_encrypt_key(const unsigned char *userKey,
    401  *	const int bits, AES_KEY *key);
    402  * Return value is non-zero on error, 0 on success.
    403  */
    404 
    405 #ifdef	OPENSSL_INTERFACE
    406 #define	rijndael_key_setup_enc_intel	intel_AES_set_encrypt_key
    407 #define	rijndael_key_setup_dec_intel	intel_AES_set_decrypt_key
    408 
    409 #define	USERCIPHERKEY		rdi	/* P1, 64 bits */
    410 #define	KEYSIZE32		esi	/* P2, 32 bits */
    411 #define	KEYSIZE64		rsi	/* P2, 64 bits */
    412 #define	AESKEY			rdx	/* P3, 64 bits */
    413 
    414 #else	/* OpenSolaris Interface */
    415 #define	AESKEY			rdi	/* P1, 64 bits */
    416 #define	USERCIPHERKEY		rsi	/* P2, 64 bits */
    417 #define	KEYSIZE32		edx	/* P3, 32 bits */
    418 #define	KEYSIZE64		rdx	/* P3, 64 bits */
    419 #endif	/* OPENSSL_INTERFACE */
    420 
    421 #define	ROUNDS32		KEYSIZE32	/* temp */
    422 #define	ROUNDS64		KEYSIZE64	/* temp */
    423 #define	ENDAESKEY		USERCIPHERKEY	/* temp */
    424 
    425 
    426 ENTRY_NP(rijndael_key_setup_enc_intel)
    427 	/* EXPORT DELETE START */
    428 	CLEAR_TS_OR_PUSH_XMM0_TO_XMM6(%r10)
    429 
    430 	/ NULL pointer sanity check
    431 	test	%USERCIPHERKEY, %USERCIPHERKEY
    432 	jz	.Lenc_key_invalid_param
    433 	test	%AESKEY, %AESKEY
    434 	jz	.Lenc_key_invalid_param
    435 
    436 	movups	(%USERCIPHERKEY), %xmm0	/ user key (first 16 bytes)
    437 	movaps	%xmm0, (%AESKEY)
    438 	lea	0x10(%AESKEY), %rcx	/ key addr
    439 	pxor	%xmm4, %xmm4		/ xmm4 is assumed 0 in _key_expansion_x
    440 
    441 	cmp	$256, %KEYSIZE32
    442 	jnz	.Lenc_key192
    443 
    444 	/ AES 256: 14 rounds in encryption key schedule
    445 #ifdef OPENSSL_INTERFACE
    446 	mov	$14, %ROUNDS32
    447 	movl	%ROUNDS32, 240(%AESKEY)		/ key.rounds = 14
    448 #endif	/* OPENSSL_INTERFACE */
    449 
    450 	movups	0x10(%USERCIPHERKEY), %xmm2	/ other user key (2nd 16 bytes)
    451 	movaps	%xmm2, (%rcx)
    452 	add	$0x10, %rcx
    453 
    454 	aeskeygenassist $0x1, %xmm2, %xmm1	/ expand the key
    455 	call	_key_expansion_256a
    456 	aeskeygenassist $0x1, %xmm0, %xmm1
    457 	call	_key_expansion_256b
    458 	aeskeygenassist $0x2, %xmm2, %xmm1	/ expand the key
    459 	call	_key_expansion_256a
    460 	aeskeygenassist $0x2, %xmm0, %xmm1
    461 	call	_key_expansion_256b
    462 	aeskeygenassist $0x4, %xmm2, %xmm1	/ expand the key
    463 	call	_key_expansion_256a
    464 	aeskeygenassist $0x4, %xmm0, %xmm1
    465 	call	_key_expansion_256b
    466 	aeskeygenassist $0x8, %xmm2, %xmm1	/ expand the key
    467 	call	_key_expansion_256a
    468 	aeskeygenassist $0x8, %xmm0, %xmm1
    469 	call	_key_expansion_256b
    470 	aeskeygenassist $0x10, %xmm2, %xmm1	/ expand the key
    471 	call	_key_expansion_256a
    472 	aeskeygenassist $0x10, %xmm0, %xmm1
    473 	call	_key_expansion_256b
    474 	aeskeygenassist $0x20, %xmm2, %xmm1	/ expand the key
    475 	call	_key_expansion_256a
    476 	aeskeygenassist $0x20, %xmm0, %xmm1
    477 	call	_key_expansion_256b
    478 	aeskeygenassist $0x40, %xmm2, %xmm1	/ expand the key
    479 	call	_key_expansion_256a
    480 
    481 	SET_TS_OR_POP_XMM0_TO_XMM6(%r10)
    482 #ifdef	OPENSSL_INTERFACE
    483 	xor	%rax, %rax			/ return 0 (OK)
    484 #else	/* Open Solaris Interface */
    485 	mov	$14, %rax			/ return # rounds = 14
    486 #endif
    487 	ret
    488 
    489 .align 4
    490 .Lenc_key192:
    491 	cmp	$192, %KEYSIZE32
    492 	jnz	.Lenc_key128
    493 
    494 	/ AES 192: 12 rounds in encryption key schedule
    495 #ifdef OPENSSL_INTERFACE
    496 	mov	$12, %ROUNDS32
    497 	movl	%ROUNDS32, 240(%AESKEY)	/ key.rounds = 12
    498 #endif	/* OPENSSL_INTERFACE */
    499 
    500 	movq	0x10(%USERCIPHERKEY), %xmm2	/ other user key
    501 	aeskeygenassist $0x1, %xmm2, %xmm1	/ expand the key
    502 	call	_key_expansion_192a
    503 	aeskeygenassist $0x2, %xmm2, %xmm1	/ expand the key
    504 	call	_key_expansion_192b
    505 	aeskeygenassist $0x4, %xmm2, %xmm1	/ expand the key
    506 	call	_key_expansion_192a
    507 	aeskeygenassist $0x8, %xmm2, %xmm1	/ expand the key
    508 	call	_key_expansion_192b
    509 	aeskeygenassist $0x10, %xmm2, %xmm1	/ expand the key
    510 	call	_key_expansion_192a
    511 	aeskeygenassist $0x20, %xmm2, %xmm1	/ expand the key
    512 	call	_key_expansion_192b
    513 	aeskeygenassist $0x40, %xmm2, %xmm1	/ expand the key
    514 	call	_key_expansion_192a
    515 	aeskeygenassist $0x80, %xmm2, %xmm1	/ expand the key
    516 	call	_key_expansion_192b
    517 
    518 	SET_TS_OR_POP_XMM0_TO_XMM6(%r10)
    519 #ifdef	OPENSSL_INTERFACE
    520 	xor	%rax, %rax			/ return 0 (OK)
    521 #else	/* OpenSolaris Interface */
    522 	mov	$12, %rax			/ return # rounds = 12
    523 #endif
    524 	ret
    525 
    526 .align 4
    527 .Lenc_key128:
    528 	cmp $128, %KEYSIZE32
    529 	jnz .Lenc_key_invalid_key_bits
    530 
    531 	/ AES 128: 10 rounds in encryption key schedule
    532 #ifdef OPENSSL_INTERFACE
    533 	mov	$10, %ROUNDS32
    534 	movl	%ROUNDS32, 240(%AESKEY)		/ key.rounds = 10
    535 #endif	/* OPENSSL_INTERFACE */
    536 
    537 	aeskeygenassist $0x1, %xmm0, %xmm1	/ expand the key
    538 	call	_key_expansion_128
    539 	aeskeygenassist $0x2, %xmm0, %xmm1	/ expand the key
    540 	call	_key_expansion_128
    541 	aeskeygenassist $0x4, %xmm0, %xmm1	/ expand the key
    542 	call	_key_expansion_128
    543 	aeskeygenassist $0x8, %xmm0, %xmm1	/ expand the key
    544 	call	_key_expansion_128
    545 	aeskeygenassist $0x10, %xmm0, %xmm1	/ expand the key
    546 	call	_key_expansion_128
    547 	aeskeygenassist $0x20, %xmm0, %xmm1	/ expand the key
    548 	call	_key_expansion_128
    549 	aeskeygenassist $0x40, %xmm0, %xmm1	/ expand the key
    550 	call	_key_expansion_128
    551 	aeskeygenassist $0x80, %xmm0, %xmm1	/ expand the key
    552 	call	_key_expansion_128
    553 	aeskeygenassist $0x1b, %xmm0, %xmm1	/ expand the key
    554 	call	_key_expansion_128
    555 	aeskeygenassist $0x36, %xmm0, %xmm1	/ expand the key
    556 	call	_key_expansion_128
    557 
    558 	SET_TS_OR_POP_XMM0_TO_XMM6(%r10)
    559 #ifdef	OPENSSL_INTERFACE
    560 	xor	%rax, %rax			/ return 0 (OK)
    561 #else	/* OpenSolaris Interface */
    562 	mov	$10, %rax			/ return # rounds = 10
    563 #endif
    564 	ret
    565 
    566 .Lenc_key_invalid_param:
    567 #ifdef	OPENSSL_INTERFACE
    568 	SET_TS_OR_POP_XMM0_TO_XMM6(%r10)
    569 	mov	$-1, %rax	/ user key or AES key pointer is NULL
    570 	ret
    571 #else
    572 	/* FALLTHROUGH */
    573 #endif	/* OPENSSL_INTERFACE */
    574 
    575 .Lenc_key_invalid_key_bits:
    576 	SET_TS_OR_POP_XMM0_TO_XMM6(%r10)
    577 #ifdef	OPENSSL_INTERFACE
    578 	mov	$-2, %rax	/ keysize is invalid
    579 #else	/* Open Solaris Interface */
    580 	xor	%rax, %rax	/ a key pointer is NULL or invalid keysize
    581 #endif	/* OPENSSL_INTERFACE */
    582 
    583 	/* EXPORT DELETE END */
    584 	ret
    585 	SET_SIZE(rijndael_key_setup_enc_intel)
    586 
    587 
    588 /*
    589  * rijndael_key_setup_dec_intel()
    590  * Expand the cipher key into the decryption key schedule.
    591  *
    592  * For kernel code, caller is responsible for ensuring kpreempt_disable()
    593  * has been called.  This is because %xmm registers are not saved/restored.
    594  * Clear and set the CR0.TS bit on entry and exit, respectively,  if TS is set
    595  * on entry.  Otherwise, if TS is not set, save and restore %xmm registers
    596  * on the stack.
    597  *
    598  * OpenSolaris interface:
    599  * int rijndael_key_setup_dec_intel(uint32_t rk[], const uint32_t cipherKey[],
    600  *	uint64_t keyBits);
    601  * Return value is 0 on error, number of rounds on success.
    602  * P1->P2, P2->P3, P3->P1
    603  *
    604  * Original Intel OpenSSL interface:
    605  * int intel_AES_set_decrypt_key(const unsigned char *userKey,
    606  *	const int bits, AES_KEY *key);
    607  * Return value is non-zero on error, 0 on success.
    608  */
    609 ENTRY_NP(rijndael_key_setup_dec_intel)
    610 	/* EXPORT DELETE START */
    611 	/ Generate round keys used for encryption
    612 	call	rijndael_key_setup_enc_intel
    613 	test	%rax, %rax
    614 #ifdef	OPENSSL_INTERFACE
    615 	jnz	.Ldec_key_exit	/ Failed if returned non-0
    616 #else	/* OpenSolaris Interface */
    617 	jz	.Ldec_key_exit	/ Failed if returned 0
    618 #endif	/* OPENSSL_INTERFACE */
    619 
    620 	CLEAR_TS_OR_PUSH_XMM0_XMM1(%r10)
    621 
    622 	/*
    623 	 * Convert round keys used for encryption
    624 	 * to a form usable for decryption
    625 	 */
    626 #ifndef	OPENSSL_INTERFACE		/* OpenSolaris Interface */
    627 	mov	%rax, %ROUNDS64		/ set # rounds (10, 12, or 14)
    628 					/ (already set for OpenSSL)
    629 #endif
    630 
    631 	lea	0x10(%AESKEY), %rcx	/ key addr
    632 	shl	$4, %ROUNDS32
    633 	add	%AESKEY, %ROUNDS64
    634 	mov	%ROUNDS64, %ENDAESKEY
    635 
    636 .align 4
    637 .Ldec_key_reorder_loop:
    638 	movaps	(%AESKEY), %xmm0
    639 	movaps	(%ROUNDS64), %xmm1
    640 	movaps	%xmm0, (%ROUNDS64)
    641 	movaps	%xmm1, (%AESKEY)
    642 	lea	0x10(%AESKEY), %AESKEY
    643 	lea	-0x10(%ROUNDS64), %ROUNDS64
    644 	cmp	%AESKEY, %ROUNDS64
    645 	ja	.Ldec_key_reorder_loop
    646 
    647 .align 4
    648 .Ldec_key_inv_loop:
    649 	movaps	(%rcx), %xmm0
    650 	/ Convert an encryption round key to a form usable for decryption
    651 	/ with the "AES Inverse Mix Columns" instruction
    652 	aesimc	%xmm0, %xmm1
    653 	movaps	%xmm1, (%rcx)
    654 	lea	0x10(%rcx), %rcx
    655 	cmp	%ENDAESKEY, %rcx
    656 	jnz	.Ldec_key_inv_loop
    657 
    658 	SET_TS_OR_POP_XMM0_XMM1(%r10)
    659 
    660 .Ldec_key_exit:
    661 	/ OpenSolaris: rax = # rounds (10, 12, or 14) or 0 for error
    662 	/ OpenSSL: rax = 0 for OK, or non-zero for error
    663 	/* EXPORT DELETE END */
    664 	ret
    665 	SET_SIZE(rijndael_key_setup_dec_intel)
    666 
    667 
    668 /*
    669  * aes_encrypt_intel()
    670  * Encrypt a single block (in and out can overlap).
    671  *
    672  * For kernel code, caller is responsible for ensuring kpreempt_disable()
    673  * has been called.  This is because %xmm registers are not saved/restored.
    674  * Clear and set the CR0.TS bit on entry and exit, respectively,  if TS is set
    675  * on entry.  Otherwise, if TS is not set, save and restore %xmm registers
    676  * on the stack.
    677  *
    678  * Temporary register usage:
    679  * %xmm0	State
    680  * %xmm1	Key
    681  *
    682  * Original OpenSolaris Interface:
    683  * void aes_encrypt_intel(const aes_ks_t *ks, int Nr,
    684  *	const uint32_t pt[4], uint32_t ct[4])
    685  *
    686  * Original Intel OpenSSL Interface:
    687  * void intel_AES_encrypt(const unsigned char *in, unsigned char *out,
    688  *	const AES_KEY *key)
    689  */
    690 
    691 #ifdef	OPENSSL_INTERFACE
    692 #define	aes_encrypt_intel	intel_AES_encrypt
    693 #define	aes_decrypt_intel	intel_AES_decrypt
    694 
    695 #define	INP		rdi	/* P1, 64 bits */
    696 #define	OUTP		rsi	/* P2, 64 bits */
    697 #define	KEYP		rdx	/* P3, 64 bits */
    698 
    699 /* No NROUNDS parameter--offset 240 from KEYP saved in %ecx:  */
    700 #define	NROUNDS32	ecx	/* temporary, 32 bits */
    701 #define	NROUNDS		cl	/* temporary,  8 bits */
    702 
    703 #else	/* OpenSolaris Interface */
    704 #define	KEYP		rdi	/* P1, 64 bits */
    705 #define	NROUNDS		esi	/* P2, 32 bits */
    706 #define	INP		rdx	/* P3, 64 bits */
    707 #define	OUTP		rcx	/* P4, 64 bits */
    708 #endif	/* OPENSSL_INTERFACE */
    709 
    710 #define	STATE		xmm0	/* temporary, 128 bits */
    711 #define	KEY		xmm1	/* temporary, 128 bits */
    712 
    713 ENTRY_NP(aes_encrypt_intel)
    714 	/* EXPORT DELETE START */
    715 	CLEAR_TS_OR_PUSH_XMM0_XMM1(%r10)
    716 
    717 	movups	(%INP), %STATE			/ input
    718 	movaps	(%KEYP), %KEY			/ key
    719 #ifdef	OPENSSL_INTERFACE
    720 	mov	240(%KEYP), %NROUNDS32		/ round count
    721 #else	/* OpenSolaris Interface */
    722 	/* Round count is already present as P2 in %rsi/%esi */
    723 #endif	/* OPENSSL_INTERFACE */
    724 
    725 	pxor	%KEY, %STATE			/ round 0
    726 	lea	0x30(%KEYP), %KEYP
    727 	cmp	$12, %NROUNDS
    728 	jb	.Lenc128
    729 	lea	0x20(%KEYP), %KEYP
    730 	je	.Lenc192
    731 
    732 	/ AES 256
    733 	lea	0x20(%KEYP), %KEYP
    734 	movaps	-0x60(%KEYP), %KEY
    735 	aesenc	%KEY, %STATE
    736 	movaps	-0x50(%KEYP), %KEY
    737 	aesenc	%KEY, %STATE
    738 
    739 .align 4
    740 .Lenc192:
    741 	/ AES 192 and 256
    742 	movaps	-0x40(%KEYP), %KEY
    743 	aesenc	%KEY, %STATE
    744 	movaps	-0x30(%KEYP), %KEY
    745 	aesenc	%KEY, %STATE
    746 
    747 .align 4
    748 .Lenc128:
    749 	/ AES 128, 192, and 256
    750 	movaps	-0x20(%KEYP), %KEY
    751 	aesenc	%KEY, %STATE
    752 	movaps	-0x10(%KEYP), %KEY
    753 	aesenc	%KEY, %STATE
    754 	movaps	(%KEYP), %KEY
    755 	aesenc	%KEY, %STATE
    756 	movaps	0x10(%KEYP), %KEY
    757 	aesenc	%KEY, %STATE
    758 	movaps	0x20(%KEYP), %KEY
    759 	aesenc	%KEY, %STATE
    760 	movaps	0x30(%KEYP), %KEY
    761 	aesenc	%KEY, %STATE
    762 	movaps	0x40(%KEYP), %KEY
    763 	aesenc	%KEY, %STATE
    764 	movaps	0x50(%KEYP), %KEY
    765 	aesenc	%KEY, %STATE
    766 	movaps	0x60(%KEYP), %KEY
    767 	aesenc	%KEY, %STATE
    768 	movaps	0x70(%KEYP), %KEY
    769 	aesenclast	 %KEY, %STATE		/ last round
    770 	movups	%STATE, (%OUTP)			/ output
    771 
    772 	SET_TS_OR_POP_XMM0_XMM1(%r10)
    773 	/* EXPORT DELETE END */
    774 	ret
    775 	SET_SIZE(aes_encrypt_intel)
    776 
    777 
    778 /*
    779  * aes_decrypt_intel()
    780  * Decrypt a single block (in and out can overlap).
    781  *
    782  * For kernel code, caller is responsible for ensuring kpreempt_disable()
    783  * has been called.  This is because %xmm registers are not saved/restored.
    784  * Clear and set the CR0.TS bit on entry and exit, respectively,  if TS is set
    785  * on entry.  Otherwise, if TS is not set, save and restore %xmm registers
    786  * on the stack.
    787  *
    788  * Temporary register usage:
    789  * %xmm0	State
    790  * %xmm1	Key
    791  *
    792  * Original OpenSolaris Interface:
    793  * void aes_decrypt_intel(const aes_ks_t *ks, int Nr,
    794  *	const uint32_t pt[4], uint32_t ct[4])/
    795  *
    796  * Original Intel OpenSSL Interface:
    797  * void intel_AES_decrypt(const unsigned char *in, unsigned char *out,
    798  *	const AES_KEY *key);
    799  */
    800 ENTRY_NP(aes_decrypt_intel)
    801 	/* EXPORT DELETE START */
    802 	CLEAR_TS_OR_PUSH_XMM0_XMM1(%r10)
    803 
    804 	movups	(%INP), %STATE			/ input
    805 	movaps	(%KEYP), %KEY			/ key
    806 #ifdef	OPENSSL_INTERFACE
    807 	mov	240(%KEYP), %NROUNDS32		/ round count
    808 #else	/* OpenSolaris Interface */
    809 	/* Round count is already present as P2 in %rsi/%esi */
    810 #endif	/* OPENSSL_INTERFACE */
    811 
    812 	pxor	%KEY, %STATE			/ round 0
    813 	lea	0x30(%KEYP), %KEYP
    814 	cmp	$12, %NROUNDS
    815 	jb	.Ldec128
    816 	lea	0x20(%KEYP), %KEYP
    817 	je	.Ldec192
    818 
    819 	/ AES 256
    820 	lea	0x20(%KEYP), %KEYP
    821 	movaps	-0x60(%KEYP), %KEY
    822 	aesdec	%KEY, %STATE
    823 	movaps	-0x50(%KEYP), %KEY
    824 	aesdec	%KEY, %STATE
    825 
    826 .align 4
    827 .Ldec192:
    828 	/ AES 192 and 256
    829 	movaps	-0x40(%KEYP), %KEY
    830 	aesdec	%KEY, %STATE
    831 	movaps	-0x30(%KEYP), %KEY
    832 	aesdec	%KEY, %STATE
    833 
    834 .align 4
    835 .Ldec128:
    836 	/ AES 128, 192, and 256
    837 	movaps	-0x20(%KEYP), %KEY
    838 	aesdec	%KEY, %STATE
    839 	movaps	-0x10(%KEYP), %KEY
    840 	aesdec	%KEY, %STATE
    841 	movaps	(%KEYP), %KEY
    842 	aesdec	%KEY, %STATE
    843 	movaps	0x10(%KEYP), %KEY
    844 	aesdec	%KEY, %STATE
    845 	movaps	0x20(%KEYP), %KEY
    846 	aesdec	%KEY, %STATE
    847 	movaps	0x30(%KEYP), %KEY
    848 	aesdec	%KEY, %STATE
    849 	movaps	0x40(%KEYP), %KEY
    850 	aesdec	%KEY, %STATE
    851 	movaps	0x50(%KEYP), %KEY
    852 	aesdec	%KEY, %STATE
    853 	movaps	0x60(%KEYP), %KEY
    854 	aesdec	%KEY, %STATE
    855 	movaps	0x70(%KEYP), %KEY
    856 	aesdeclast	%KEY, %STATE		/ last round
    857 	movups	%STATE, (%OUTP)			/ output
    858 
    859 	SET_TS_OR_POP_XMM0_XMM1(%r10)
    860 	ret
    861 	/* EXPORT DELETE END */
    862 	SET_SIZE(aes_decrypt_intel)
    863 
    864 #endif	/* lint || __lint */
    865