1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License, Version 1.0 only 6 * (the "License"). You may not use this file except in compliance 7 * with the License. 8 * 9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10 * or http://www.opensolaris.org/os/licensing. 11 * See the License for the specific language governing permissions 12 * and limitations under the License. 13 * 14 * When distributing Covered Code, include this CDDL HEADER in each 15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16 * If applicable, add the following below this CDDL HEADER, with the 17 * fields enclosed by brackets "[]" replaced with your own identifying 18 * information: Portions Copyright [yyyy] [name of copyright owner] 19 * 20 * CDDL HEADER END 21 */ 22 /* 23 * Copyright 1997 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */ 28 /* All Rights Reserved */ 29 30 /* 31 * University Copyright- Copyright (c) 1982, 1986, 1988 32 * The Regents of the University of California 33 * All Rights Reserved 34 * 35 * University Acknowledgment- Portions of this document are derived from 36 * software developed by the University of California, Berkeley, and its 37 * contributors. 38 */ 39 40 #ifndef _REGEXP_H 41 #define _REGEXP_H 42 43 #pragma ident "%Z%%M% %I% %E% SMI" 44 45 #include <ctype.h> 46 47 #ifdef __cplusplus 48 extern "C" { 49 #endif 50 51 #define CBRA 2 52 #define CCHR 4 53 #define CDOT 8 54 #define CCL 12 55 #define CXCL 16 56 #define CDOL 20 57 #define CCEOF 22 58 #define CKET 24 59 #define CBACK 36 60 #define NCCL 40 61 62 #define STAR 01 63 #define RNGE 03 64 65 #define NBRA 9 66 67 #define PLACE(c) ep[c >> 3] |= bittab[c & 07] 68 #define ISTHERE(c) (ep[c >> 3] & bittab[c & 07]) 69 #define ecmp(s1, s2, n) (strncmp(s1, s2, n) == 0) 70 71 static char *braslist[NBRA]; 72 static char *braelist[NBRA]; 73 int sed, nbra; 74 char *loc1, *loc2, *locs; 75 static int nodelim; 76 77 int circf; 78 static int low; 79 static int size; 80 81 static char bittab[] = { 1, 2, 4, 8, 16, 32, 64, 128 }; 82 83 char * 84 compile(instring, ep, endbuf, seof) 85 char *ep; 86 char *instring, *endbuf; 87 { 88 INIT /* Dependent declarations and initializations */ 89 int c; 90 int eof = seof; 91 char *lastep = instring; 92 int cclcnt; 93 char bracket[NBRA], *bracketp; 94 int closed; 95 int neg; 96 int lc; 97 int i, cflg; 98 int iflag; /* used for non-ascii characters in brackets */ 99 100 lastep = 0; 101 if ((c = GETC()) == eof || c == '\n') { 102 if (c == '\n') { 103 UNGETC(c); 104 nodelim = 1; 105 } 106 if (*ep == 0 && !sed) 107 ERROR(41); 108 RETURN(ep); 109 } 110 bracketp = bracket; 111 circf = closed = nbra = 0; 112 if (c == '^') 113 circf++; 114 else 115 UNGETC(c); 116 while (1) { 117 if (ep >= endbuf) 118 ERROR(50); 119 c = GETC(); 120 if (c != '*' && ((c != '\\') || (PEEKC() != '{'))) 121 lastep = ep; 122 if (c == eof) { 123 *ep++ = CCEOF; 124 if (bracketp != bracket) 125 ERROR(42); 126 RETURN(ep); 127 } 128 switch (c) { 129 130 case '.': 131 *ep++ = CDOT; 132 continue; 133 134 case '\n': 135 if (!sed) { 136 UNGETC(c); 137 *ep++ = CCEOF; 138 nodelim = 1; 139 if (bracketp != bracket) 140 ERROR(42); 141 RETURN(ep); 142 } else ERROR(36); 143 case '*': 144 if (lastep == 0 || *lastep == CBRA || *lastep == CKET) 145 goto defchar; 146 *lastep |= STAR; 147 continue; 148 149 case '$': 150 if (PEEKC() != eof && PEEKC() != '\n') 151 goto defchar; 152 *ep++ = CDOL; 153 continue; 154 155 case '[': 156 if (&ep[17] >= endbuf) 157 ERROR(50); 158 159 *ep++ = CCL; 160 lc = 0; 161 for (i = 0; i < 16; i++) 162 ep[i] = 0; 163 164 neg = 0; 165 if ((c = GETC()) == '^') { 166 neg = 1; 167 c = GETC(); 168 } 169 iflag = 1; 170 do { 171 c &= 0377; 172 if (c == '\0' || c == '\n') 173 ERROR(49); 174 if ((c & 0200) && iflag) { 175 iflag = 0; 176 if (&ep[32] >= endbuf) 177 ERROR(50); 178 ep[-1] = CXCL; 179 for (i = 16; i < 32; i++) 180 ep[i] = 0; 181 } 182 if (c == '-' && lc != 0) { 183 if ((c = GETC()) == ']') { 184 PLACE('-'); 185 break; 186 } 187 if ((c & 0200) && iflag) { 188 iflag = 0; 189 if (&ep[32] >= endbuf) 190 ERROR(50); 191 ep[-1] = CXCL; 192 for (i = 16; i < 32; i++) 193 ep[i] = 0; 194 } 195 while (lc < c) { 196 PLACE(lc); 197 lc++; 198 } 199 } 200 lc = c; 201 PLACE(c); 202 } while ((c = GETC()) != ']'); 203 204 if (iflag) 205 iflag = 16; 206 else 207 iflag = 32; 208 209 if (neg) { 210 if (iflag == 32) { 211 for (cclcnt = 0; cclcnt < iflag; 212 cclcnt++) 213 ep[cclcnt] ^= 0377; 214 ep[0] &= 0376; 215 } else { 216 ep[-1] = NCCL; 217 /* make nulls match so test fails */ 218 ep[0] |= 01; 219 } 220 } 221 222 ep += iflag; 223 224 continue; 225 226 case '\\': 227 switch (c = GETC()) { 228 229 case '(': 230 if (nbra >= NBRA) 231 ERROR(43); 232 *bracketp++ = nbra; 233 *ep++ = CBRA; 234 *ep++ = nbra++; 235 continue; 236 237 case ')': 238 if (bracketp <= bracket) 239 ERROR(42); 240 *ep++ = CKET; 241 *ep++ = *--bracketp; 242 closed++; 243 continue; 244 245 case '{': 246 if (lastep == (char *) 0) 247 goto defchar; 248 *lastep |= RNGE; 249 cflg = 0; 250 nlim: 251 c = GETC(); 252 i = 0; 253 do { 254 if ('0' <= c && c <= '9') 255 i = 10 * i + c - '0'; 256 else 257 ERROR(16); 258 } while (((c = GETC()) != '\\') && (c != ',')); 259 if (i >= 255) 260 ERROR(11); 261 *ep++ = i; 262 if (c == ',') { 263 if (cflg++) 264 ERROR(44); 265 if ((c = GETC()) == '\\') 266 *ep++ = 255; 267 else { 268 UNGETC(c); 269 goto nlim; 270 /* get 2'nd number */ 271 } 272 } 273 if (GETC() != '}') 274 ERROR(45); 275 if (!cflg) /* one number */ 276 *ep++ = i; 277 else if ((ep[-1] & 0377) < (ep[-2] & 0377)) 278 ERROR(46); 279 continue; 280 281 case '\n': 282 ERROR(36); 283 284 case 'n': 285 c = '\n'; 286 goto defchar; 287 288 default: 289 if (c >= '1' && c <= '9') { 290 if ((c -= '1') >= closed) 291 ERROR(25); 292 *ep++ = CBACK; 293 *ep++ = c; 294 continue; 295 } 296 } 297 /* Drop through to default to use \ to turn off special chars */ 298 299 defchar: 300 default: 301 lastep = ep; 302 *ep++ = CCHR; 303 *ep++ = c; 304 } 305 } 306 } 307 308 int 309 step(p1, p2) 310 char *p1, *p2; 311 { 312 int c; 313 314 315 if (circf) { 316 loc1 = p1; 317 return (advance(p1, p2)); 318 } 319 /* fast check for first character */ 320 if (*p2 == CCHR) { 321 c = p2[1]; 322 do { 323 if (*p1 != c) 324 continue; 325 if (advance(p1, p2)) { 326 loc1 = p1; 327 return (1); 328 } 329 } while (*p1++); 330 return (0); 331 } 332 /* regular algorithm */ 333 do { 334 if (advance(p1, p2)) { 335 loc1 = p1; 336 return (1); 337 } 338 } while (*p1++); 339 return (0); 340 } 341 342 advance(lp, ep) 343 char *lp, *ep; 344 { 345 char *curlp; 346 int c; 347 char *bbeg; 348 char neg; 349 int ct; 350 351 while (1) { 352 neg = 0; 353 switch (*ep++) { 354 355 case CCHR: 356 if (*ep++ == *lp++) 357 continue; 358 return (0); 359 360 case CDOT: 361 if (*lp++) 362 continue; 363 return (0); 364 365 case CDOL: 366 if (*lp == 0) 367 continue; 368 return (0); 369 370 case CCEOF: 371 loc2 = lp; 372 return (1); 373 374 case CXCL: 375 c = (unsigned char)*lp++; 376 if (ISTHERE(c)) { 377 ep += 32; 378 continue; 379 } 380 return (0); 381 382 case NCCL: 383 neg = 1; 384 385 case CCL: 386 c = *lp++; 387 if (((c & 0200) == 0 && ISTHERE(c)) ^ neg) { 388 ep += 16; 389 continue; 390 } 391 return (0); 392 393 case CBRA: 394 braslist[*ep++] = lp; 395 continue; 396 397 case CKET: 398 braelist[*ep++] = lp; 399 continue; 400 401 case CCHR | RNGE: 402 c = *ep++; 403 getrnge(ep); 404 while (low--) 405 if (*lp++ != c) 406 return (0); 407 curlp = lp; 408 while (size--) 409 if (*lp++ != c) 410 break; 411 if (size < 0) 412 lp++; 413 ep += 2; 414 goto star; 415 416 case CDOT | RNGE: 417 getrnge(ep); 418 while (low--) 419 if (*lp++ == '\0') 420 return (0); 421 curlp = lp; 422 while (size--) 423 if (*lp++ == '\0') 424 break; 425 if (size < 0) 426 lp++; 427 ep += 2; 428 goto star; 429 430 case CXCL | RNGE: 431 getrnge(ep + 32); 432 while (low--) { 433 c = (unsigned char)*lp++; 434 if (!ISTHERE(c)) 435 return (0); 436 } 437 curlp = lp; 438 while (size--) { 439 c = (unsigned char)*lp++; 440 if (!ISTHERE(c)) 441 break; 442 } 443 if (size < 0) 444 lp++; 445 ep += 34; /* 32 + 2 */ 446 goto star; 447 448 case NCCL | RNGE: 449 neg = 1; 450 451 case CCL | RNGE: 452 getrnge(ep + 16); 453 while (low--) { 454 c = *lp++; 455 if (((c & 0200) || !ISTHERE(c)) ^ neg) 456 return (0); 457 } 458 curlp = lp; 459 while (size--) { 460 c = *lp++; 461 if (((c & 0200) || !ISTHERE(c)) ^ neg) 462 break; 463 } 464 if (size < 0) 465 lp++; 466 ep += 18; /* 16 + 2 */ 467 goto star; 468 469 case CBACK: 470 bbeg = braslist[*ep]; 471 ct = braelist[*ep++] - bbeg; 472 473 if (ecmp(bbeg, lp, ct)) { 474 lp += ct; 475 continue; 476 } 477 return (0); 478 479 case CBACK | STAR: 480 bbeg = braslist[*ep]; 481 ct = braelist[*ep++] - bbeg; 482 curlp = lp; 483 while (ecmp(bbeg, lp, ct)) 484 lp += ct; 485 486 while (lp >= curlp) { 487 if (advance(lp, ep)) 488 return (1); 489 lp -= ct; 490 } 491 return (0); 492 493 494 case CDOT | STAR: 495 curlp = lp; 496 while (*lp++); 497 goto star; 498 499 case CCHR | STAR: 500 curlp = lp; 501 while (*lp++ == *ep); 502 ep++; 503 goto star; 504 505 case CXCL | STAR: 506 curlp = lp; 507 do { 508 c = (unsigned char)*lp++; 509 } while (ISTHERE(c)); 510 ep += 32; 511 goto star; 512 513 case NCCL | STAR: 514 neg = 1; 515 516 case CCL | STAR: 517 curlp = lp; 518 do { 519 c = *lp++; 520 } while (((c & 0200) == 0 && ISTHERE(c)) ^ neg); 521 ep += 16; 522 goto star; 523 524 star: 525 do { 526 if (--lp == locs) 527 break; 528 if (advance(lp, ep)) 529 return (1); 530 } while (lp > curlp); 531 return (0); 532 533 } 534 } 535 } 536 537 static 538 getrnge(str) 539 char *str; 540 { 541 low = *str++ & 0377; 542 size = ((*str & 0377) == 255)? 20000: (*str &0377) - low; 543 } 544 545 #ifdef __cplusplus 546 } 547 #endif 548 549 #endif /* _REGEXP_H */ 550