Home | History | Annotate | Download | only in arcfour
      1 /*
      2  * CDDL HEADER START
      3  *
      4  * The contents of this file are subject to the terms of the
      5  * Common Development and Distribution License (the "License").
      6  * You may not use this file except in compliance with the License.
      7  *
      8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
      9  * or http://www.opensolaris.org/os/licensing.
     10  * See the License for the specific language governing permissions
     11  * and limitations under the License.
     12  *
     13  * When distributing Covered Code, include this CDDL HEADER in each
     14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
     15  * If applicable, add the following below this CDDL HEADER, with the
     16  * fields enclosed by brackets "[]" replaced with your own identifying
     17  * information: Portions Copyright [yyyy] [name of copyright owner]
     18  *
     19  * CDDL HEADER END
     20  */
     21 /*
     22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
     23  * Use is subject to license terms.
     24  */
     25 
     26 #define	ARCFOUR_LOOP_OPTIMIZED
     27 
     28 #ifndef _KERNEL
     29 #include <stdint.h>
     30 #endif	/* _KERNEL */
     31 
     32 #include "arcfour.h"
     33 
     34 #if defined(__amd64)
     35 /* ARCFour_key.flag values */
     36 #define	ARCFOUR_ON_INTEL	1
     37 #define	ARCFOUR_ON_AMD64	0
     38 
     39 #ifdef _KERNEL
     40 #include <sys/x86_archext.h>
     41 #include <sys/cpuvar.h>
     42 
     43 #else
     44 #include <sys/auxv.h>
     45 #endif	/* _KERNEL */
     46 #endif	/* __amd64 */
     47 
     48 #ifndef __amd64
     49 /*
     50  * Initialize the key stream 'key' using the key value.
     51  *
     52  * Input:
     53  * keyval	User-provided key
     54  * keyvallen	Length, in bytes, of keyval
     55  * Output:
     56  * key		Initialized ARCFOUR key schedule, based on keyval
     57  */
     58 void
     59 arcfour_key_init(ARCFour_key *key, uchar_t *keyval, int keyvallen)
     60 {
     61 /* EXPORT DELETE START */
     62 
     63 	uchar_t ext_keyval[256];
     64 	uchar_t tmp;
     65 	int i, j;
     66 
     67 	/* Normalize key length to 256 */
     68 	for (i = j = 0; i < 256; i++, j++) {
     69 		if (j == keyvallen)
     70 			j = 0;
     71 		ext_keyval[i] = keyval[j];
     72 	}
     73 
     74 	for (i = 0; i < 256; i++)
     75 		key->arr[i] = (uchar_t)i;
     76 
     77 	j = 0;
     78 	for (i = 0; i < 256; i++) {
     79 		j = (j + key->arr[i] + ext_keyval[i]) & 0xff;
     80 		tmp = key->arr[i];
     81 		key->arr[i] = key->arr[j];
     82 		key->arr[j] = tmp;
     83 	}
     84 	key->i = 0;
     85 	key->j = 0;
     86 
     87 /* EXPORT DELETE END */
     88 }
     89 #endif	/* !__amd64 */
     90 
     91 
     92 /*
     93  * Encipher 'in' using 'key'.
     94  *
     95  * Input:
     96  * key		ARCFOUR key, initialized by arcfour_key_init()
     97  * in		Input text
     98  * out		Buffer to contain output text
     99  * len		Length, in bytes, of the in and out buffers
    100  *
    101  * Output:
    102  * out		Buffer containing output text
    103  *
    104  * Note: in and out can point to the same location
    105  */
    106 void
    107 arcfour_crypt(ARCFour_key *key, uchar_t *in, uchar_t *out, size_t len)
    108 {
    109 /* EXPORT DELETE START */
    110 #ifdef	__amd64
    111 	if (key->flag == ARCFOUR_ON_AMD64) {
    112 		arcfour_crypt_asm(key, in, out, len);
    113 	} else { /* Intel EM64T */
    114 #endif	/* amd64 */
    115 
    116 	size_t		ii;
    117 	uchar_t		i, j, ti, tj;
    118 #ifdef ARCFOUR_LOOP_OPTIMIZED
    119 	uchar_t		arr_ij;
    120 #endif
    121 #ifdef __amd64
    122 	uint32_t	*arr;
    123 #else
    124 	uchar_t		*arr;
    125 #endif
    126 
    127 #ifdef	sun4u
    128 	/*
    129 	 * The sun4u has a version of arcfour_crypt_aligned() hand-tuned for
    130 	 * the cases where the input and output buffers are aligned on
    131 	 * a multiple of 8-byte boundary.
    132 	 */
    133 	int		index;
    134 	uchar_t		tmp;
    135 
    136 	index = (((uint64_t)(uintptr_t)in) & 0x7);
    137 
    138 	/* Get the 'in' on an 8-byte alignment */
    139 	if (index > 0) {
    140 		i = key->i;
    141 		j = key->j;
    142 		for (index = 8 - (uint64_t)(uintptr_t)in & 0x7;
    143 		    (index-- > 0) && len > 0;
    144 		    len--, in++, out++) {
    145 			++i;
    146 			j = j + key->arr[i];
    147 			tmp = key->arr[i];
    148 			key->arr[i] = key->arr[j];
    149 			key->arr[j] = tmp;
    150 			tmp = key->arr[i] + key->arr[j];
    151 			*out = *in ^ key->arr[tmp];
    152 		}
    153 		key->i = i;
    154 		key->j = j;
    155 	}
    156 
    157 	if (len == 0)
    158 		return;
    159 
    160 	/* See if we're fortunate and 'out' got aligned as well */
    161 
    162 	if ((((uint64_t)(uintptr_t)out) & 7) != 0) {
    163 #endif	/* sun4u */
    164 
    165 	i = key->i;
    166 	j = key->j;
    167 	arr = key->arr;
    168 
    169 #ifndef ARCFOUR_LOOP_OPTIMIZED
    170 	/*
    171 	 * This loop is hasn't been reordered, but is kept for reference
    172 	 * purposes as it's more readable
    173 	 */
    174 	for (ii = 0; ii < len; ++ii) {
    175 		++i;
    176 		ti = arr[i];
    177 		j = j + ti;
    178 		tj = arr[j];
    179 		arr[j] = ti;
    180 		arr[i] = tj;
    181 		out[ii] = in[ii] ^ arr[(ti + tj) & 0xff];
    182 	}
    183 
    184 #else
    185 	/*
    186 	 * This for loop is optimized by carefully spreading out
    187 	 * memory access and storage to avoid conflicts,
    188 	 * allowing the processor to process operations in parallel
    189 	 */
    190 
    191 	/* for loop setup */
    192 	++i;
    193 	ti = arr[i];
    194 	j = j + ti;
    195 	tj = arr[j];
    196 	arr[j] = ti;
    197 	arr[i] = tj;
    198 	arr_ij = arr[(ti + tj) & 0xff];
    199 	--len;
    200 
    201 	for (ii = 0; ii < len; ) {
    202 		++i;
    203 		ti = arr[i];
    204 		j = j + ti;
    205 		tj = arr[j];
    206 		arr[j] = ti;
    207 		arr[i] = tj;
    208 
    209 		/* save result from previous loop: */
    210 		out[ii] = in[ii] ^ arr_ij;
    211 
    212 		++ii;
    213 		arr_ij = arr[(ti + tj) & 0xff];
    214 	}
    215 	/* save result from last loop: */
    216 	out[ii] = in[ii] ^ arr_ij;
    217 #endif
    218 
    219 	key->i = i;
    220 	key->j = j;
    221 
    222 #ifdef	sun4u
    223 	} else {
    224 		arcfour_crypt_aligned(key, len, in, out);
    225 	}
    226 #endif	/* sun4u */
    227 #ifdef	__amd64
    228 	}
    229 #endif	/* amd64 */
    230 
    231 /* EXPORT DELETE END */
    232 }
    233 
    234 
    235 #ifdef	__amd64
    236 /*
    237  * Return 1 if executing on Intel, otherwise 0 (e.g., AMD64).
    238  * Cache the result, as the CPU can't change.
    239  *
    240  * Note: the userland version uses getisax() and checks for an AMD-64-only
    241  * feature.  The kernel version uses cpuid_getvendor().
    242  */
    243 int
    244 arcfour_crypt_on_intel(void)
    245 {
    246 	static int	cached_result = -1;
    247 
    248 	if (cached_result == -1) { /* first time */
    249 #ifdef _KERNEL
    250 		cached_result = (cpuid_getvendor(CPU) == X86_VENDOR_Intel);
    251 #else
    252 		uint_t	ui;
    253 
    254 		(void) getisax(&ui, 1);
    255 		cached_result = ((ui & AV_386_AMD_MMX) == 0);
    256 #endif	/* _KERNEL */
    257 	}
    258 
    259 	return (cached_result);
    260 }
    261 #endif	/* __amd64 */
    262