1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 .file "strcasecmp.s" 28 29 /* 30 * The strcasecmp() function is a case insensitive versions of strcmp(). 31 * It assumes the ASCII character set and ignores differences in case 32 * when comparing lower and upper case characters. In other words, it 33 * behaves as if both strings had been converted to lower case using 34 * tolower() in the "C" locale on each byte, and the results had then 35 * been compared using strcmp(). 36 * 37 * The assembly code below is an optimized version of the following C 38 * reference: 39 * 40 * static const char charmap[] = { 41 * '\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007', 42 * '\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017', 43 * '\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027', 44 * '\030', '\031', '\032', '\033', '\034', '\035', '\036', '\037', 45 * '\040', '\041', '\042', '\043', '\044', '\045', '\046', '\047', 46 * '\050', '\051', '\052', '\053', '\054', '\055', '\056', '\057', 47 * '\060', '\061', '\062', '\063', '\064', '\065', '\066', '\067', 48 * '\070', '\071', '\072', '\073', '\074', '\075', '\076', '\077', 49 * '\100', '\141', '\142', '\143', '\144', '\145', '\146', '\147', 50 * '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157', 51 * '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167', 52 * '\170', '\171', '\172', '\133', '\134', '\135', '\136', '\137', 53 * '\140', '\141', '\142', '\143', '\144', '\145', '\146', '\147', 54 * '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157', 55 * '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167', 56 * '\170', '\171', '\172', '\173', '\174', '\175', '\176', '\177', 57 * '\200', '\201', '\202', '\203', '\204', '\205', '\206', '\207', 58 * '\210', '\211', '\212', '\213', '\214', '\215', '\216', '\217', 59 * '\220', '\221', '\222', '\223', '\224', '\225', '\226', '\227', 60 * '\230', '\231', '\232', '\233', '\234', '\235', '\236', '\237', 61 * '\240', '\241', '\242', '\243', '\244', '\245', '\246', '\247', 62 * '\250', '\251', '\252', '\253', '\254', '\255', '\256', '\257', 63 * '\260', '\261', '\262', '\263', '\264', '\265', '\266', '\267', 64 * '\270', '\271', '\272', '\273', '\274', '\275', '\276', '\277', 65 * '\300', '\301', '\302', '\303', '\304', '\305', '\306', '\307', 66 * '\310', '\311', '\312', '\313', '\314', '\315', '\316', '\317', 67 * '\320', '\321', '\322', '\323', '\324', '\325', '\326', '\327', 68 * '\330', '\331', '\332', '\333', '\334', '\335', '\336', '\337', 69 * '\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347', 70 * '\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357', 71 * '\360', '\361', '\362', '\363', '\364', '\365', '\366', '\367', 72 * '\370', '\371', '\372', '\373', '\374', '\375', '\376', '\377', 73 * }; 74 * 75 * int 76 * strcasecmp(const char *s1, const char *s2) 77 * { 78 * const unsigned char *cm = (const unsigned char *)charmap; 79 * const unsigned char *us1 = (const unsigned char *)s1; 80 * const unsigned char *us2 = (const unsigned char *)s2; 81 * 82 * while (cm[*us1] == cm[*us2++]) 83 * if (*us1++ == '\0') 84 * return (0); 85 * return (cm[*us1] - cm[*(us2 - 1)]); 86 * } 87 * 88 * The following algorithm, from a 1987 news posting by Alan Mycroft, is 89 * used for finding null bytes in a word: 90 * 91 * #define has_null(word) ((word - 0x01010101) & (~word & 0x80808080)) 92 * 93 * The following algorithm is used for a wordwise tolower() operation: 94 * 95 * unsigned int 96 * parallel_tolower (unsigned int x) 97 * { 98 * unsigned int p; 99 * unsigned int q; 100 * 101 * unsigned int m1 = 0x80808080; 102 * unsigned int m2 = 0x3f3f3f3f; 103 * unsigned int m3 = 0x25252525; 104 * 105 * q = x & ~m1;// newb = byte & 0x7F 106 * p = q + m2; // newb > 0x5A --> MSB set 107 * q = q + m3; // newb < 0x41 --> MSB clear 108 * p = p & ~q; // newb > 0x40 && newb < 0x5B --> MSB set 109 * q = m1 & ~x;// byte < 0x80 --> 0x80 110 * q = p & q; // newb > 0x40 && newb < 0x5B && byte < 0x80 -> 0x80,else 0 111 * q = q >> 2; // newb > 0x40 && newb < 0x5B && byte < 0x80 -> 0x20,else 0 112 * return (x + q); // translate uppercase characters to lowercase 113 * } 114 * 115 * Both algorithms have been tested exhaustively for all possible 2^32 inputs. 116 */ 117 118 #include <sys/asm_linkage.h> 119 120 ! The first part of this algorithm walks through the beginning of 121 ! both strings a byte at a time until the source ptr is aligned to 122 ! a word boundary. During these steps, the bytes are translated to 123 ! lower-case if they are upper-case, and are checked against 124 ! the source string. 125 126 ENTRY(strcasecmp) 127 128 .align 32 129 130 save %sp, -SA(WINDOWSIZE), %sp 131 subcc %i0, %i1, %i2 ! s1 == s2 ? 132 bz,pn %ncc, .stringsequal ! yup, done, strings equal 133 andcc %i0, 3, %i3 ! s1 word-aligned ? 134 bz,pn %ncc, .s1aligned1 ! yup 135 sethi %hi(0x80808080), %i4 ! start loading Mycroft's magic1 136 137 ldub [%i1 + %i2], %i0 ! s1[0] 138 ldub [%i1], %g1 ! s2[0] 139 sub %i0, 'A', %l0 ! transform for faster uppercase check 140 sub %g1, 'A', %l1 ! transform for faster uppercase check 141 cmp %l0, ('Z' - 'A') ! s1[0] uppercase? 142 bleu,a .noxlate11 ! yes 143 add %i0, ('a' - 'A'), %i0 ! s1[0] = tolower(s1[0]) 144 .noxlate11: 145 cmp %l1, ('Z' - 'A') ! s2[0] uppercase? 146 bleu,a .noxlate12 ! yes 147 add %g1, ('a' - 'A'), %g1 ! s2[0] = tolower(s2[0]) 148 .noxlate12: 149 subcc %i0, %g1, %i0 ! tolower(s1[0]) != tolower(s2[0]) ? 150 bne,pn %ncc, .done ! yup, done 151 inc %i1 ! s1++, s2++ 152 addcc %i0, %g1, %i0 ! s1[0] == 0 ? 153 bz,pn %ncc, .done ! yup, done, strings equal 154 cmp %i3, 3 ! s1 aligned now? 155 bz %ncc, .s1aligned2 ! yup 156 sethi %hi(0x01010101), %i5 ! start loading Mycroft's magic2 157 158 ldub [%i1 + %i2], %i0 ! s1[1] 159 ldub [%i1], %g1 ! s2[1] 160 sub %i0, 'A', %l0 ! transform for faster uppercase check 161 sub %g1, 'A', %l1 ! transform for faster uppercase check 162 cmp %l0, ('Z' - 'A') ! s1[1] uppercase? 163 bleu,a .noxlate21 ! yes 164 add %i0, ('a' - 'A'), %i0 ! s1[1] = tolower(s1[1]) 165 .noxlate21: 166 cmp %l1, ('Z' - 'A') ! s2[1] uppercase? 167 bleu,a .noxlate22 ! yes 168 add %g1, ('a' - 'A'), %g1 ! s2[1] = tolower(s2[1]) 169 .noxlate22: 170 subcc %i0, %g1, %i0 ! tolower(s1[1]) != tolower(s2[1]) ? 171 bne,pn %ncc, .done ! yup, done 172 inc %i1 ! s1++, s2++ 173 addcc %i0, %g1, %i0 ! s1[1] == 0 ? 174 bz,pn %ncc, .done ! yup, done, strings equal 175 cmp %i3, 2 ! s1 aligned now? 176 bz %ncc, .s1aligned3 ! yup 177 or %i4, %lo(0x80808080),%i4! finish loading Mycroft's magic1 178 179 ldub [%i1 + %i2], %i0 ! s1[2] 180 ldub [%i1], %g1 ! s2[2] 181 sub %i0, 'A', %l0 ! transform for faster uppercase check 182 sub %g1, 'A', %l1 ! transform for faster uppercase check 183 cmp %l0, ('Z' - 'A') ! s1[2] uppercase? 184 bleu,a .noxlate31 ! yes 185 add %i0, ('a' - 'A'), %i0 ! s1[2] = tolower(s1[2]) 186 .noxlate31: 187 cmp %l1, ('Z' - 'A') ! s2[2] uppercase? 188 bleu,a .noxlate32 ! yes 189 add %g1, ('a' - 'A'), %g1 ! s2[2] = tolower(s2[2]) 190 .noxlate32: 191 subcc %i0, %g1, %i0 ! tolower(s1[2]) != tolower(s2[2]) ? 192 bne,pn %ncc, .done ! yup, done 193 inc %i1 ! s1++, s2++ 194 addcc %i0, %g1, %i0 ! s1[2] == 0 ? 195 bz,pn %ncc, .done ! yup, done, strings equal 196 or %i5, %lo(0x01010101),%i5! finish loading Mycroft's magic2 197 ba .s1aligned4 ! s1 aligned now 198 andcc %i1, 3, %i3 ! s2 word-aligned ? 199 200 ! Here, we initialize our checks for a zero byte and decide 201 ! whether or not we can optimize further if we're fortunate 202 ! enough to have a word aligned desintation 203 204 .s1aligned1: 205 sethi %hi(0x01010101), %i5 ! start loading Mycroft's magic2 206 .s1aligned2: 207 or %i4, %lo(0x80808080),%i4! finish loading Mycroft's magic1 208 .s1aligned3: 209 or %i5, %lo(0x01010101),%i5! finish loading Mycroft's magic2 210 andcc %i1, 3, %i3 ! s2 word aligned ? 211 .s1aligned4: 212 sethi %hi(0x3f3f3f3f), %l2 ! load m2 for parallel tolower() 213 sethi %hi(0x25252525), %l3 ! load m3 for parallel tolower() 214 or %l2, %lo(0x3f3f3f3f),%l2! finish loading m2 215 bz .word4 ! yup, s2 word-aligned 216 or %l3, %lo(0x25252525),%l3! finish loading m3 217 218 add %i2, %i3, %i2 ! start adjusting offset s1-s2 219 sll %i3, 3, %l6 ! shift factor for left shifts 220 andn %i1, 3, %i1 ! round s1 pointer down to next word 221 sub %g0, %l6, %l7 ! shift factor for right shifts 222 orn %i3, %g0, %i3 ! generate all ones 223 lduw [%i1], %i0 ! new lower word from s2 224 srl %i3, %l6, %i3 ! mask for fixing up bytes 225 sll %i0, %l6, %g1 ! partial unaligned word from s2 226 orn %i0, %i3, %i0 ! force start bytes to non-zero 227 nop ! pad to align loop to 16-byte boundary 228 nop ! pad to align loop to 16-byte boundary 229 230 ! This is the comparision procedure used if the destination is not 231 ! word aligned, if it is, we use word4 & cmp4 232 233 .cmp: 234 andn %i4, %i0, %l4 ! ~word & 0x80808080 235 sub %i0, %i5, %l5 ! word - 0x01010101 236 andcc %l5, %l4, %g0 ! (word - 0x01010101) & ~word & 0x80808080 237 bz,a,pt %ncc, .doload ! null byte in previous aligned s2 word 238 lduw [%i1 + 4], %i0 ! load next aligned word from s2 239 .doload: 240 srl %i0, %l7, %i3 ! byte 1 from new aligned word from s2 241 or %g1, %i3, %g1 ! merge to get unaligned word from s2 242 lduw [%i1 + %i2], %i3 ! x1 = word from s1 243 andn %i3, %i4, %l0 ! q1 = x1 & ~m1 244 andn %g1, %i4, %l4 ! q2 = x2 & ~m1 245 add %l0, %l2, %l1 ! p1 = q1 + m2 246 add %l4, %l2, %l5 ! p2 = q2 + m2 247 add %l0, %l3, %l0 ! q1 = q1 + m3 248 add %l4, %l3, %l4 ! q2 = q2 + m3 249 andn %l1, %l0, %l1 ! p1 = p1 & ~q1 250 andn %l5, %l4, %l5 ! p2 = p2 & ~q2 251 andn %i4, %i3, %l0 ! q1 = m1 & ~x1 252 andn %i4, %g1, %l4 ! q2 = m1 & ~x2 253 and %l0, %l1, %l0 ! q1 = p1 & q1 254 and %l4, %l5, %l4 ! q2 = p2 & q2 255 srl %l0, 2, %l0 ! q1 = q1 >> 2 256 srl %l4, 2, %l4 ! q2 = q2 >> 2 257 add %l0, %i3, %i3 ! lowercase word from s1 258 add %l4, %g1, %g1 ! lowercase word from s2 259 cmp %i3, %g1 ! tolower(*s1) != tolower(*s2) ? 260 bne %icc, .wordsdiffer ! yup, now find byte that is different 261 add %i1, 4, %i1 ! s1+=4, s2+=4 262 andn %i4, %i3, %l4 ! ~word & 0x80808080 263 sub %i3, %i5, %l5 ! word - 0x01010101 264 andcc %l5, %l4, %g0 ! (word - 0x01010101) & ~word & 0x80808080 265 bz,pt %ncc, .cmp ! no null-byte in s1 yet 266 sll %i0, %l6, %g1 ! partial unaligned word from s2 267 268 ! words are equal but the end of s1 has been reached 269 ! this means the strings must be equal 270 .stringsequal: 271 ret ! return 272 restore %g0, %g0, %o0 ! return 0, i.e. strings are equal 273 nop ! pad 274 275 276 ! we have a word aligned source and destination! This means 277 ! things get to go fast! 278 279 .word4: 280 lduw [%i1 + %i2], %i3 ! x1 = word from s1 281 282 .cmp4: 283 andn %i3, %i4, %l0 ! q1 = x1 & ~m1 284 lduw [%i1], %g1 ! x2 = word from s2 285 andn %g1, %i4, %l4 ! q2 = x2 & ~m1 286 add %l0, %l2, %l1 ! p1 = q1 + m2 287 add %l4, %l2, %l5 ! p2 = q2 + m2 288 add %l0, %l3, %l0 ! q1 = q1 + m3 289 add %l4, %l3, %l4 ! q2 = q2 + m3 290 andn %l1, %l0, %l1 ! p1 = p1 & ~q1 291 andn %l5, %l4, %l5 ! p2 = p2 & ~q2 292 andn %i4, %i3, %l0 ! q1 = m1 & ~x1 293 andn %i4, %g1, %l4 ! q2 = m1 & ~x2 294 and %l0, %l1, %l0 ! q1 = p1 & q1 295 and %l4, %l5, %l4 ! q2 = p2 & q2 296 srl %l0, 2, %l0 ! q1 = q1 >> 2 297 srl %l4, 2, %l4 ! q2 = q2 >> 2 298 add %l0, %i3, %i3 ! lowercase word from s1 299 add %l4, %g1, %g1 ! lowercase word from s2 300 cmp %i3, %g1 ! tolower(*s1) != tolower(*s2) ? 301 bne,pn %icc, .wordsdiffer ! yup, now find mismatching character 302 add %i1, 4, %i1 ! s1+=4, s2+=4 303 andn %i4, %i3, %l4 ! ~word & 0x80808080 304 sub %i3, %i5, %l5 ! word - 0x01010101 305 andcc %l5, %l4, %g0 ! (word - 0x01010101) & ~word & 0x80808080 306 bz,a,pt %icc, .cmp4 ! no null-byte in s1 yet 307 lduw [%i1 + %i2], %i3 ! load word from s1 308 309 ! words are equal but the end of s1 has been reached 310 ! this means the strings must be equal 311 .stringsequal4: 312 ret ! return 313 restore %g0, %g0, %o0 ! return 0, i.e. strings are equal 314 315 .wordsdiffer: 316 srl %g1, 24, %i2 ! first byte of mismatching word in s2 317 srl %i3, 24, %i1 ! first byte of mismatching word in s1 318 subcc %i1, %i2, %i0 ! *s1-*s2 319 bnz,pn %ncc, .done ! bytes differ, return difference 320 srl %g1, 16, %i2 ! second byte of mismatching word in s2 321 andcc %i1, 0xff, %i0 ! *s1 == 0 ? 322 bz,pn %ncc, .done ! yup 323 324 ! we know byte 1 is equal, so can compare bytes 1,2 as a group 325 326 srl %i3, 16, %i1 ! second byte of mismatching word in s1 327 subcc %i1, %i2, %i0 ! *s1-*s2 328 bnz,pn %ncc, .done ! bytes differ, return difference 329 srl %g1, 8, %i2 ! third byte of mismatching word in s2 330 andcc %i1, 0xff, %i0 ! *s1 == 0 ? 331 bz,pn %ncc, .done ! yup 332 333 ! we know bytes 1, 2 are equal, so can compare bytes 1,2,3 as a group 334 335 srl %i3, 8, %i1 ! third byte of mismatching word in s1 336 subcc %i1, %i2, %i0 ! *s1-*s2 337 bnz,pn %ncc, .done ! bytes differ, return difference 338 andcc %i1, 0xff, %g0 ! *s1 == 0 ? 339 bz,pn %ncc, .stringsequal ! yup 340 341 ! we know bytes 1,2,3 are equal, so can compare bytes 1,2,3,4 as group 342 343 subcc %i3, %g1, %i0 ! *s1-*s2 344 bz,a .done ! bytes differ, return difference 345 andcc %i3, 0xff, %i0 ! *s1 == 0 ? 346 347 .done: 348 ret ! return 349 restore %i0, %g0, %o0 ! return tolower(*s1) - tolower(*s2) 350 351 SET_SIZE(strcasecmp) 352