Home | History | Annotate | Download | only in md5
      1 /*
      2  * CDDL HEADER START
      3  *
      4  * The contents of this file are subject to the terms of the
      5  * Common Development and Distribution License (the "License").
      6  * You may not use this file except in compliance with the License.
      7  *
      8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
      9  * or http://www.opensolaris.org/os/licensing.
     10  * See the License for the specific language governing permissions
     11  * and limitations under the License.
     12  *
     13  * When distributing Covered Code, include this CDDL HEADER in each
     14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
     15  * If applicable, add the following below this CDDL HEADER, with the
     16  * fields enclosed by brackets "[]" replaced with your own identifying
     17  * information: Portions Copyright [yyyy] [name of copyright owner]
     18  *
     19  * CDDL HEADER END
     20  */
     21 
     22 /*
     23  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
     24  * Use is subject to license terms.
     25  */
     26 
     27 #ifndef	_MD5_BYTESWAP_H
     28 #define	_MD5_BYTESWAP_H
     29 
     30 /*
     31  * definitions for inline functions for little-endian loads.
     32  *
     33  * This file has special definitions for UltraSPARC architectures,
     34  * which have a special address space identifier for loading 32 and 16 bit
     35  * integers in little-endian byte order.
     36  *
     37  * This file and common/crypto/md5/sparc/sun4[uv]/byteswap.il implement the
     38  * same thing and must be changed together.
     39  */
     40 
     41 #include <sys/types.h>
     42 #if defined(__sparc)
     43 #include <v9/sys/asi.h>
     44 #elif defined(_LITTLE_ENDIAN)
     45 #include <sys/byteorder.h>
     46 #endif
     47 
     48 #ifdef	__cplusplus
     49 extern "C" {
     50 #endif
     51 
     52 #if defined(_LITTLE_ENDIAN)
     53 
     54 /*
     55  * Little-endian optimization:  I don't need to do any weirdness.   On
     56  * some little-endian boxen, I'll have to do alignment checks, but I can do
     57  * that below.
     58  */
     59 
     60 #if !defined(__i386) && !defined(__amd64)
     61 /*
     62  * i386 and amd64 don't require aligned 4-byte loads.  The symbol
     63  * _MD5_CHECK_ALIGNMENT indicates below whether the MD5Transform function
     64  * requires alignment checking.
     65  */
     66 #define	_MD5_CHECK_ALIGNMENT
     67 #endif /* !__i386 && !__amd64 */
     68 
     69 #define	LOAD_LITTLE_32(addr)	(*(uint32_t *)(void *)(addr))
     70 
     71 #else	/* !_LITTLE_ENDIAN */
     72 
     73 /*
     74  * sparc v9/v8plus optimization:
     75  *
     76  * on the sparc v9/v8plus, we can load data little endian.  however, since
     77  * the compiler doesn't have direct support for little endian, we
     78  * link to an assembly-language routine `load_little_32' to do
     79  * the magic.  note that special care must be taken to ensure the
     80  * address is 32-bit aligned -- in the interest of speed, we don't
     81  * check to make sure, since careful programming can guarantee this
     82  * for us.
     83  */
     84 #if defined(sun4u)
     85 
     86 /* Define alignment check because we can 4-byte load as little endian. */
     87 #define	_MD5_CHECK_ALIGNMENT
     88 #define	LOAD_LITTLE_32(addr)    load_little_32((uint32_t *)(void *)(addr))
     89 
     90 #if !defined(__lint) && defined(__GNUC__)
     91 
     92 static __inline__ uint32_t
     93 load_little_32(uint32_t *addr)
     94 {
     95 	uint32_t value;
     96 
     97 	__asm__(
     98 	    "lduwa	[%1] %2, %0\n\t"
     99 	    : "=r" (value)
    100 	    : "r" (addr), "i" (ASI_PL));
    101 
    102 	return (value);
    103 }
    104 #endif	/* !__lint && __GNUC__ */
    105 
    106 #if !defined(__GNUC__)
    107 extern	uint32_t load_little_32(uint32_t *);
    108 #endif	/* !__GNUC__ */
    109 
    110 /* Placate lint */
    111 #if defined(__lint)
    112 uint32_t
    113 load_little_32(uint32_t *addr)
    114 {
    115 	return (*addr);
    116 }
    117 #endif	/* __lint */
    118 
    119 #elif defined(_LITTLE_ENDIAN)
    120 #define	LOAD_LITTLE_32(addr)	htonl(addr)
    121 
    122 #else
    123 /* big endian -- will work on little endian, but slowly */
    124 /* Since we do byte operations, we don't have to check for alignment. */
    125 #define	LOAD_LITTLE_32(addr)	\
    126 	((addr)[0] | ((addr)[1] << 8) | ((addr)[2] << 16) | ((addr)[3] << 24))
    127 #endif	/* sun4u */
    128 
    129 #if defined(sun4v)
    130 
    131 /*
    132  * For N1 want to minimize number of arithmetic operations. This is best
    133  * achieved by using the %asi register to specify ASI for the lduwa operations.
    134  * Also, have a separate inline template for each word, so can utilize the
    135  * immediate offset in lduwa, without relying on the compiler to do the right
    136  * thing.
    137  *
    138  * Moving to 64-bit loads might also be beneficial.
    139  */
    140 #define	LOAD_LITTLE_32_0(addr)	load_little_32_0((uint32_t *)(addr))
    141 #define	LOAD_LITTLE_32_1(addr)	load_little_32_1((uint32_t *)(addr))
    142 #define	LOAD_LITTLE_32_2(addr)	load_little_32_2((uint32_t *)(addr))
    143 #define	LOAD_LITTLE_32_3(addr)	load_little_32_3((uint32_t *)(addr))
    144 #define	LOAD_LITTLE_32_4(addr)	load_little_32_4((uint32_t *)(addr))
    145 #define	LOAD_LITTLE_32_5(addr)	load_little_32_5((uint32_t *)(addr))
    146 #define	LOAD_LITTLE_32_6(addr)	load_little_32_6((uint32_t *)(addr))
    147 #define	LOAD_LITTLE_32_7(addr)	load_little_32_7((uint32_t *)(addr))
    148 #define	LOAD_LITTLE_32_8(addr)	load_little_32_8((uint32_t *)(addr))
    149 #define	LOAD_LITTLE_32_9(addr)	load_little_32_9((uint32_t *)(addr))
    150 #define	LOAD_LITTLE_32_a(addr)	load_little_32_a((uint32_t *)(addr))
    151 #define	LOAD_LITTLE_32_b(addr)	load_little_32_b((uint32_t *)(addr))
    152 #define	LOAD_LITTLE_32_c(addr)	load_little_32_c((uint32_t *)(addr))
    153 #define	LOAD_LITTLE_32_d(addr)	load_little_32_d((uint32_t *)(addr))
    154 #define	LOAD_LITTLE_32_e(addr)	load_little_32_e((uint32_t *)(addr))
    155 #define	LOAD_LITTLE_32_f(addr)	load_little_32_f((uint32_t *)(addr))
    156 
    157 #if !defined(__lint) && defined(__GNUC__)
    158 
    159 /*
    160  * This actually sets the ASI register, not necessarily to ASI_PL.
    161  */
    162 static __inline__ void
    163 set_little(uint8_t asi)
    164 {
    165 	__asm__ __volatile__(
    166 	    "wr	%%g0, %0, %%asi\n\t"
    167 	    : /* Nothing */
    168 	    : "r" (asi));
    169 }
    170 
    171 static __inline__ uint8_t
    172 get_little(void)
    173 {
    174 	uint8_t asi;
    175 
    176 	__asm__ __volatile__(
    177 	    "rd	%%asi, %0\n\t"
    178 	    : "=r" (asi));
    179 
    180 	return (asi);
    181 }
    182 
    183 /*
    184  * We have 16 functions which differ only in the offset from which they
    185  * load.  Use this preprocessor template to simplify maintenance.  Its
    186  * argument is the offset in hex, without the 0x.
    187  */
    188 #define	LL_TEMPLATE(__off)			\
    189 static __inline__ uint32_t			\
    190 load_little_32_##__off(uint32_t *addr)		\
    191 {						\
    192 	uint32_t value;				\
    193 	__asm__(				\
    194 		"lduwa	[%1 + %2]%%asi, %0\n\t"	\
    195 	: "=r" (value)				\
    196 	: "r" (addr), "i" ((0x##__off) << 2));	\
    197 	return (value);				\
    198 }
    199 
    200 LL_TEMPLATE(0)
    201 LL_TEMPLATE(1)
    202 LL_TEMPLATE(2)
    203 LL_TEMPLATE(3)
    204 LL_TEMPLATE(4)
    205 LL_TEMPLATE(5)
    206 LL_TEMPLATE(6)
    207 LL_TEMPLATE(7)
    208 LL_TEMPLATE(8)
    209 LL_TEMPLATE(9)
    210 LL_TEMPLATE(a)
    211 LL_TEMPLATE(b)
    212 LL_TEMPLATE(c)
    213 LL_TEMPLATE(d)
    214 LL_TEMPLATE(e)
    215 LL_TEMPLATE(f)
    216 #undef	LL_TEMPLATE
    217 
    218 #endif	/* !__lint && __GNUC__ */
    219 
    220 #if !defined(__GNUC__)
    221 /*
    222  * Using the %asi register to achieve little endian loads - register
    223  * is set using a inline template.
    224  *
    225  * Saves a few arithmetic ops as can now use an immediate offset with the
    226  * lduwa instructions.
    227  */
    228 extern void set_little(uint32_t);
    229 extern uint32_t get_little(void);
    230 
    231 extern	uint32_t load_little_32_0(uint32_t *);
    232 extern	uint32_t load_little_32_1(uint32_t *);
    233 extern	uint32_t load_little_32_2(uint32_t *);
    234 extern	uint32_t load_little_32_3(uint32_t *);
    235 extern	uint32_t load_little_32_4(uint32_t *);
    236 extern	uint32_t load_little_32_5(uint32_t *);
    237 extern	uint32_t load_little_32_6(uint32_t *);
    238 extern	uint32_t load_little_32_7(uint32_t *);
    239 extern	uint32_t load_little_32_8(uint32_t *);
    240 extern	uint32_t load_little_32_9(uint32_t *);
    241 extern	uint32_t load_little_32_a(uint32_t *);
    242 extern	uint32_t load_little_32_b(uint32_t *);
    243 extern	uint32_t load_little_32_c(uint32_t *);
    244 extern	uint32_t load_little_32_d(uint32_t *);
    245 extern	uint32_t load_little_32_e(uint32_t *);
    246 extern	uint32_t load_little_32_f(uint32_t *);
    247 #endif	/* !__GNUC__ */
    248 #endif	/* sun4v */
    249 
    250 #endif	/* _LITTLE_ENDIAN */
    251 
    252 #ifdef	__cplusplus
    253 }
    254 #endif
    255 
    256 #endif	/* !_MD5_BYTESWAP_H */
    257