Home | History | Annotate | Download | only in gen
      1 /*
      2  * CDDL HEADER START
      3  *
      4  * The contents of this file are subject to the terms of the
      5  * Common Development and Distribution License (the "License").
      6  * You may not use this file except in compliance with the License.
      7  *
      8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
      9  * or http://www.opensolaris.org/os/licensing.
     10  * See the License for the specific language governing permissions
     11  * and limitations under the License.
     12  *
     13  * When distributing Covered Code, include this CDDL HEADER in each
     14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
     15  * If applicable, add the following below this CDDL HEADER, with the
     16  * fields enclosed by brackets "[]" replaced with your own identifying
     17  * information: Portions Copyright [yyyy] [name of copyright owner]
     18  *
     19  * CDDL HEADER END
     20  */
     21 
     22 /*
     23  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
     24  * Use is subject to license terms.
     25  */
     26 
     27 	.file	"memcmp.s"
     28 
     29 /*
     30  * memcmp(s1, s2, len)
     31  *
     32  * Compare n bytes:  s1>s2: >0  s1==s2: 0  s1<s2: <0
     33  *
     34  * Fast assembler language version of the following C-program for memcmp
     35  * which represents the `standard' for the C-library.
     36  *
     37  *	int
     38  *	memcmp(const void *s1, const void *s2, size_t n)
     39  *	{
     40  *		if (s1 != s2 && n != 0) {
     41  *			const char *ps1 = s1;
     42  *			const char *ps2 = s2;
     43  *			do {
     44  *				if (*ps1++ != *ps2++)
     45  *					return (ps1[-1] - ps2[-1]);
     46  *			} while (--n != 0);
     47  *		}
     48  *		return (0);
     49  *	}
     50  */
     51 
     52 #include <sys/asm_linkage.h>
     53 
     54 	ANSI_PRAGMA_WEAK(memcmp,function)
     55 
     56 	ENTRY(memcmp)
     57 	cmp	%o0, %o1		! s1 == s2?
     58 	be,pn	%xcc, .cmpeq
     59 	cmp	%o2, 17
     60 	bleu,a,pn %xcc, .cmpbyt		! for small counts go do bytes
     61 	sub	%o1, %o0, %o1
     62 
     63 	andcc	%o0, 3, %o3		! is s1 aligned?
     64 	bz,a,pn	%icc, .iss2		! if so go check s2
     65 	andcc	%o1, 3, %o4		! is s2 aligned?
     66 	cmp	%o3, 2
     67 	be,pn	%icc, .algn2
     68 	cmp	%o3, 3
     69 
     70 .algn1:	ldub	[%o0], %o4		! cmp one byte
     71 	inc	%o0
     72 	ldub	[%o1], %o5
     73 	inc	%o1
     74 	dec	%o2
     75 	be,pn	%icc, .algn3
     76 	cmp	%o4, %o5
     77 	be,pt	%icc, .algn2
     78 	nop
     79 	b,a	.noteq
     80 
     81 .algn2:	lduh	[%o0], %o4
     82 	inc	2, %o0
     83 	ldub	[%o1], %o5
     84 	inc	1, %o1
     85 	srl	%o4, 8, %o3
     86 	cmp	%o3, %o5
     87 	be,a,pt	%icc, 1f
     88 	ldub	[%o1], %o5		! delay slot, get next byte from s2
     89 	b	.noteq
     90 	mov	%o3, %o4		! delay slot, move *s1 to %o4
     91 1:	inc	%o1
     92 	dec	2, %o2
     93 	and	%o4, 0xff, %o4
     94 	cmp	%o4, %o5
     95 .algn3:	be,a,pt	%icc, .iss2
     96 	andcc	%o1, 3, %o4		! delay slot, is s2 aligned?
     97 	b,a	.noteq
     98 
     99 .cmpbyt:b	.bytcmp
    100 	deccc	%o2
    101 1:	ldub	[%o0 + %o1], %o5	! byte compare loop
    102 	inc	%o0
    103 	cmp	%o4, %o5
    104 	be,a,pt	%icc, .bytcmp
    105 	deccc	%o2			! delay slot, compare count (len)
    106 	b,a	.noteq
    107 .bytcmp:bgeu,a,pt %xcc, 1b
    108 	ldub	[%o0], %o4
    109 .cmpeq:
    110 	retl				! strings compare equal
    111 	clr	%o0
    112 
    113 .noteq_word:				! words aren't equal. find unequal byte
    114 	srl	%o4, 24, %o1		! first byte
    115 	srl	%o5, 24, %o2
    116 	cmp	%o1, %o2
    117 	bne,pn	%icc, 1f
    118 	sll	%o4, 8, %o4
    119 	sll	%o5, 8, %o5
    120 	srl	%o4, 24, %o1
    121 	srl	%o5, 24, %o2
    122 	cmp	%o1, %o2
    123 	bne,pn	%icc, 1f
    124 	sll	%o4, 8, %o4
    125 	sll	%o5, 8, %o5
    126 	srl	%o4, 24, %o1
    127 	srl	%o5, 24, %o2
    128 	cmp	%o1, %o2
    129 	bne,pn	%icc, 1f
    130 	sll	%o4, 8, %o4
    131 	sll	%o5, 8, %o5
    132 	srl	%o4, 24, %o1
    133 	srl	%o5, 24, %o2
    134 1:
    135 	retl
    136 	sub	%o1, %o2, %o0		! delay slot
    137 
    138 .noteq:
    139 	retl				! strings aren't equal
    140 	sub	%o4, %o5, %o0		! delay slot, return(*s1 - *s2)
    141 
    142 .iss2:	andn	%o2, 3, %o3		! count of aligned bytes
    143 	and	%o2, 3, %o2		! remaining bytes
    144 	bz,pn	%icc, .w4cmp		! if s2 word aligned, compare words
    145 	cmp	%o4, 2
    146 	be,pn	%icc, .w2cmp		! s2 half aligned
    147 	cmp	%o4, 1
    148 
    149 .w3cmp:
    150 	dec	4, %o3			! avoid reading beyond the last byte
    151 	inc	4, %o2
    152 	ldub	[%o1], %g1		! read a byte to align for word reads
    153 	inc	1, %o1
    154 	be,pt	%icc, .w1cmp		! aligned to 1 or 3 bytes
    155 	sll	%g1, 24, %o5
    156 
    157 	sub	%o1, %o0, %o1
    158 2:	lduw	[%o0 + %o1], %g1
    159 	lduw	[%o0], %o4
    160 	inc	4, %o0
    161 	srl	%g1, 8, %g5		! merge with the other half
    162 	or	%g5, %o5, %o5
    163 	cmp	%o4, %o5
    164 	bne,pt	%icc, .noteq_word
    165 	deccc	4, %o3
    166 	bnz,pt	%xcc, 2b
    167 	sll	%g1, 24, %o5
    168 	sub	%o1, 1, %o1		! used 3 bytes of the last word read
    169 	b	.bytcmp
    170 	deccc	%o2
    171 
    172 .w1cmp:
    173 	dec	4, %o3			! avoid reading beyond the last byte
    174 	inc	4, %o2
    175 	lduh	[%o1], %g1		! read 3 bytes to word align
    176 	inc	2, %o1
    177 	sll	%g1, 8, %g5
    178 	or	%o5, %g5, %o5
    179 
    180 	sub	%o1, %o0, %o1
    181 3:	lduw	[%o0 + %o1], %g1
    182 	lduw	[%o0], %o4
    183 	inc	4, %o0
    184 	srl	%g1, 24, %g5		! merge with the other half
    185 	or	%g5, %o5, %o5
    186 	cmp	%o4, %o5
    187 	bne,pt	%icc, .noteq_word
    188 	deccc	4, %o3
    189 	bnz,pt	%xcc, 3b
    190 	sll	%g1, 8, %o5
    191 	sub	%o1, 3, %o1		! used 1 byte of the last word read
    192 	b	.bytcmp
    193 	deccc	%o2
    194 
    195 .w2cmp:
    196 	dec	4, %o3			! avoid reading beyond the last byte
    197 	inc	4, %o2
    198 	lduh	[%o1], %g1		! read a halfword to align s2
    199 	inc	2, %o1
    200 	sll	%g1, 16, %o5
    201 	sub	%o1, %o0, %o1
    202 4:	lduw	[%o0 + %o1], %g1	! read a word from s2
    203 	lduw	[%o0], %o4		! read a word from s1
    204 	inc	4, %o0
    205 	srl	%g1, 16, %g5		! merge with the other half
    206 	or	%g5, %o5, %o5
    207 	cmp	%o4, %o5
    208 	bne,pn	%icc, .noteq_word
    209 	deccc	4, %o3
    210 	bnz,pt	%xcc, 4b
    211 	sll	%g1, 16, %o5
    212 	sub	%o1, 2, %o1		! only used half of the last read word
    213 	b	.bytcmp
    214 	deccc	%o2
    215 
    216 .w4cmp:
    217 	sub	%o1, %o0, %o1
    218 	lduw	[%o0 + %o1], %o5
    219 5:	lduw	[%o0], %o4
    220 	inc	4, %o0
    221 	cmp	%o4, %o5
    222 	bne,pt	%icc, .noteq_word
    223 	deccc	4, %o3
    224 	bnz,a,pt %xcc, 5b
    225 	lduw	[%o0 + %o1], %o5
    226 	b	.bytcmp			! compare remaining bytes, if any
    227 	deccc	%o2
    228 
    229 	SET_SIZE(memcmp)
    230