1 /* 2 * Copyright (c) 2005, 2008 Sun Microsystems, Inc. All Rights Reserved. 3 * Use is subject to license terms. 4 * 5 * Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T 6 * All Rights Reserved 7 * 8 * University Copyright- Copyright (c) 1982, 1986, 1988 9 * The Regents of the University of California 10 * All Rights Reserved 11 * 12 * University Acknowledgment- Portions of this document are derived from 13 * software developed by the University of California, Berkeley, and its 14 * contributors. 15 * 16 * Licensed under the Apache License, Version 2.0 (the "License"); 17 * you may not use this file except in compliance with the License. 18 * You may obtain a copy of the License at 19 * http://www.apache.org/licenses/LICENSE-2.0. 20 * 21 * Unless required by applicable law or agreed to in writing, software 22 * distributed under the License is distributed on an "AS IS" BASIS, 23 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express 24 * or implied. 25 * See the License for the specific language governing permissions and 26 * limitations under the License. 27 */ 28 29 /* Code moved from regexp.h */ 30 31 #include "apr.h" 32 #include "apr_lib.h" 33 #ifdef APR_HAVE_LIMITS_H 34 #include <limits.h> 35 #endif 36 #if APR_HAVE_STDLIB_H 37 #include <stdlib.h> 38 #endif 39 #include "libsed.h" 40 #include "regexp.h" 41 #include "sed.h" 42 43 #define GETC() ((unsigned char)*sp++) 44 #define PEEKC() ((unsigned char)*sp) 45 #define UNGETC(c) (--sp) 46 #define SEDCOMPILE_ERROR(c) { \ 47 regerrno = c; \ 48 goto out; \ 49 } 50 #define ecmp(s1, s2, n) (strncmp(s1, s2, n) == 0) 51 #define uletter(c) (isalpha(c) || c == '_') 52 53 54 static unsigned char bittab[] = { 1, 2, 4, 8, 16, 32, 64, 128 }; 55 56 static int regerr(sed_commands_t *commands, int err); 57 static void comperr(sed_commands_t *commands, char *msg); 58 static void getrnge(char *str, step_vars_storage *vars); 59 static int _advance(char *, char *, step_vars_storage *); 60 extern int sed_step(char *p1, char *p2, int circf, step_vars_storage *vars); 61 62 63 static void comperr(sed_commands_t *commands, char *msg) 64 { 65 command_errf(commands, msg, commands->linebuf); 66 } 67 68 /* 69 */ 70 static int regerr(sed_commands_t *commands, int err) 71 { 72 switch(err) { 73 case 0: 74 /* No error */ 75 break; 76 case 11: 77 comperr(commands, "Range endpoint too large: %s"); 78 break; 79 80 case 16: 81 comperr(commands, "Bad number: %s"); 82 break; 83 84 case 25: 85 comperr(commands, "``\\digit'' out of range: %s"); 86 break; 87 88 case 36: 89 comperr(commands, "Illegal or missing delimiter: %s"); 90 break; 91 92 case 41: 93 comperr(commands, "No remembered search string: %s"); 94 break; 95 96 case 42: 97 comperr(commands, "\\( \\) imbalance: %s"); 98 break; 99 100 case 43: 101 comperr(commands, "Too many \\(: %s"); 102 break; 103 104 case 44: 105 comperr(commands, "More than 2 numbers given in \\{ \\}: %s"); 106 break; 107 108 case 45: 109 comperr(commands, "} expected after \\: %s"); 110 break; 111 112 case 46: 113 comperr(commands, "First number exceeds second in \\{ \\}: %s"); 114 break; 115 116 case 49: 117 comperr(commands, "[ ] imbalance: %s"); 118 break; 119 120 case 50: 121 comperr(commands, SEDERR_TMMES); 122 break; 123 124 default: 125 comperr(commands, "Unknown regexp error code %s\n"); 126 break; 127 } 128 return (0); 129 } 130 131 132 char *sed_compile(sed_commands_t *commands, sed_comp_args *compargs, 133 char *ep, char *endbuf, int seof) 134 { 135 int c; 136 int eof = seof; 137 char *lastep; 138 int cclcnt; 139 char bracket[NBRA], *bracketp; 140 int closed; 141 int neg; 142 int lc; 143 int i, cflg; 144 int iflag; /* used for non-ascii characters in brackets */ 145 int nodelim = 0; 146 char *sp = commands->cp; 147 int regerrno = 0; 148 149 lastep = 0; 150 if ((c = GETC()) == eof || c == '\n') { 151 if (c == '\n') { 152 UNGETC(c); 153 nodelim = 1; 154 } 155 commands->cp = sp; 156 goto out; 157 } 158 bracketp = bracket; 159 compargs->circf = closed = compargs->nbra = 0; 160 if (c == '^') 161 compargs->circf++; 162 else 163 UNGETC(c); 164 while (1) { 165 if (ep >= endbuf) 166 SEDCOMPILE_ERROR(50); 167 c = GETC(); 168 if (c != '*' && ((c != '\\') || (PEEKC() != '{'))) 169 lastep = ep; 170 if (c == eof) { 171 *ep++ = CCEOF; 172 if (bracketp != bracket) 173 SEDCOMPILE_ERROR(42); 174 commands->cp = sp; 175 goto out; 176 } 177 switch (c) { 178 179 case '.': 180 *ep++ = CDOT; 181 continue; 182 183 case '\n': 184 SEDCOMPILE_ERROR(36); 185 commands->cp = sp; 186 goto out; 187 case '*': 188 if (lastep == 0 || *lastep == CBRA || *lastep == CKET) 189 goto defchar; 190 *lastep |= STAR; 191 continue; 192 193 case '$': 194 if (PEEKC() != eof && PEEKC() != '\n') 195 goto defchar; 196 *ep++ = CDOL; 197 continue; 198 199 case '[': 200 if (&ep[17] >= endbuf) 201 SEDCOMPILE_ERROR(50); 202 203 *ep++ = CCL; 204 lc = 0; 205 for (i = 0; i < 16; i++) 206 ep[i] = 0; 207 208 neg = 0; 209 if ((c = GETC()) == '^') { 210 neg = 1; 211 c = GETC(); 212 } 213 iflag = 1; 214 do { 215 c &= 0377; 216 if (c == '\0' || c == '\n') 217 SEDCOMPILE_ERROR(49); 218 if ((c & 0200) && iflag) { 219 iflag = 0; 220 if (&ep[32] >= endbuf) 221 SEDCOMPILE_ERROR(50); 222 ep[-1] = CXCL; 223 for (i = 16; i < 32; i++) 224 ep[i] = 0; 225 } 226 if (c == '-' && lc != 0) { 227 if ((c = GETC()) == ']') { 228 PLACE('-'); 229 break; 230 } 231 if ((c & 0200) && iflag) { 232 iflag = 0; 233 if (&ep[32] >= endbuf) 234 SEDCOMPILE_ERROR(50); 235 ep[-1] = CXCL; 236 for (i = 16; i < 32; i++) 237 ep[i] = 0; 238 } 239 while (lc < c) { 240 PLACE(lc); 241 lc++; 242 } 243 } 244 lc = c; 245 PLACE(c); 246 } while ((c = GETC()) != ']'); 247 248 if (iflag) 249 iflag = 16; 250 else 251 iflag = 32; 252 253 if (neg) { 254 if (iflag == 32) { 255 for (cclcnt = 0; cclcnt < iflag; 256 cclcnt++) 257 ep[cclcnt] ^= 0377; 258 ep[0] &= 0376; 259 } else { 260 ep[-1] = NCCL; 261 /* make nulls match so test fails */ 262 ep[0] |= 01; 263 } 264 } 265 266 ep += iflag; 267 268 continue; 269 270 case '\\': 271 switch (c = GETC()) { 272 273 case '(': 274 if (compargs->nbra >= NBRA) 275 SEDCOMPILE_ERROR(43); 276 *bracketp++ = compargs->nbra; 277 *ep++ = CBRA; 278 *ep++ = compargs->nbra++; 279 continue; 280 281 case ')': 282 if (bracketp <= bracket) 283 SEDCOMPILE_ERROR(42); 284 *ep++ = CKET; 285 *ep++ = *--bracketp; 286 closed++; 287 continue; 288 289 case '{': 290 if (lastep == (char *) 0) 291 goto defchar; 292 *lastep |= RNGE; 293 cflg = 0; 294 nlim: 295 c = GETC(); 296 i = 0; 297 do { 298 if ('0' <= c && c <= '9') 299 i = 10 * i + c - '0'; 300 else 301 SEDCOMPILE_ERROR(16); 302 } while (((c = GETC()) != '\\') && (c != ',')); 303 if (i >= 255) 304 SEDCOMPILE_ERROR(11); 305 *ep++ = i; 306 if (c == ',') { 307 if (cflg++) 308 SEDCOMPILE_ERROR(44); 309 if ((c = GETC()) == '\\') 310 *ep++ = (char) 255; 311 else { 312 UNGETC(c); 313 goto nlim; 314 /* get 2'nd number */ 315 } 316 } 317 if (GETC() != '}') 318 SEDCOMPILE_ERROR(45); 319 if (!cflg) /* one number */ 320 *ep++ = i; 321 else if ((ep[-1] & 0377) < (ep[-2] & 0377)) 322 SEDCOMPILE_ERROR(46); 323 continue; 324 325 case '\n': 326 SEDCOMPILE_ERROR(36); 327 328 case 'n': 329 c = '\n'; 330 goto defchar; 331 332 default: 333 if (c >= '1' && c <= '9') { 334 if ((c -= '1') >= closed) 335 SEDCOMPILE_ERROR(25); 336 *ep++ = CBACK; 337 *ep++ = c; 338 continue; 339 } 340 } 341 /* Drop through to default to use \ to turn off special chars */ 342 343 defchar: 344 default: 345 lastep = ep; 346 *ep++ = CCHR; 347 *ep++ = c; 348 } 349 } 350 out: 351 if (regerrno) { 352 regerr(commands, regerrno); 353 return (char*) NULL; 354 } 355 /* XXX : Basant : what extra */ 356 /* int reglength = (int)(ep - expbuf); */ 357 return ep; 358 } 359 360 int sed_step(char *p1, char *p2, int circf, step_vars_storage *vars) 361 { 362 int c; 363 364 365 if (circf) { 366 vars->loc1 = p1; 367 return (_advance(p1, p2, vars)); 368 } 369 /* fast check for first character */ 370 if (*p2 == CCHR) { 371 c = p2[1]; 372 do { 373 if (*p1 != c) 374 continue; 375 if (_advance(p1, p2, vars)) { 376 vars->loc1 = p1; 377 return (1); 378 } 379 } while (*p1++); 380 return (0); 381 } 382 /* regular algorithm */ 383 do { 384 if (_advance(p1, p2, vars)) { 385 vars->loc1 = p1; 386 return (1); 387 } 388 } while (*p1++); 389 return (0); 390 } 391 392 static int _advance(char *lp, char *ep, step_vars_storage *vars) 393 { 394 char *curlp; 395 int c; 396 char *bbeg; 397 char neg; 398 int ct; 399 int epint; /* int value of *ep */ 400 401 while (1) { 402 neg = 0; 403 switch (*ep++) { 404 405 case CCHR: 406 if (*ep++ == *lp++) 407 continue; 408 return (0); 409 410 case CDOT: 411 if (*lp++) 412 continue; 413 return (0); 414 415 case CDOL: 416 if (*lp == 0) 417 continue; 418 return (0); 419 420 case CCEOF: 421 vars->loc2 = lp; 422 return (1); 423 424 case CXCL: 425 c = (unsigned char)*lp++; 426 if (ISTHERE(c)) { 427 ep += 32; 428 continue; 429 } 430 return (0); 431 432 case NCCL: 433 neg = 1; 434 435 case CCL: 436 c = *lp++; 437 if (((c & 0200) == 0 && ISTHERE(c)) ^ neg) { 438 ep += 16; 439 continue; 440 } 441 return (0); 442 443 case CBRA: 444 epint = (int) *ep; 445 vars->braslist[epint] = lp; 446 ep++; 447 continue; 448 449 case CKET: 450 epint = (int) *ep; 451 vars->braelist[epint] = lp; 452 ep++; 453 continue; 454 455 case CCHR | RNGE: 456 c = *ep++; 457 getrnge(ep, vars); 458 while (vars->low--) 459 if (*lp++ != c) 460 return (0); 461 curlp = lp; 462 while (vars->size--) 463 if (*lp++ != c) 464 break; 465 if (vars->size < 0) 466 lp++; 467 ep += 2; 468 goto star; 469 470 case CDOT | RNGE: 471 getrnge(ep, vars); 472 while (vars->low--) 473 if (*lp++ == '\0') 474 return (0); 475 curlp = lp; 476 while (vars->size--) 477 if (*lp++ == '\0') 478 break; 479 if (vars->size < 0) 480 lp++; 481 ep += 2; 482 goto star; 483 484 case CXCL | RNGE: 485 getrnge(ep + 32, vars); 486 while (vars->low--) { 487 c = (unsigned char)*lp++; 488 if (!ISTHERE(c)) 489 return (0); 490 } 491 curlp = lp; 492 while (vars->size--) { 493 c = (unsigned char)*lp++; 494 if (!ISTHERE(c)) 495 break; 496 } 497 if (vars->size < 0) 498 lp++; 499 ep += 34; /* 32 + 2 */ 500 goto star; 501 502 case NCCL | RNGE: 503 neg = 1; 504 505 case CCL | RNGE: 506 getrnge(ep + 16, vars); 507 while (vars->low--) { 508 c = *lp++; 509 if (((c & 0200) || !ISTHERE(c)) ^ neg) 510 return (0); 511 } 512 curlp = lp; 513 while (vars->size--) { 514 c = *lp++; 515 if (((c & 0200) || !ISTHERE(c)) ^ neg) 516 break; 517 } 518 if (vars->size < 0) 519 lp++; 520 ep += 18; /* 16 + 2 */ 521 goto star; 522 523 case CBACK: 524 epint = (int) *ep; 525 bbeg = vars->braslist[epint]; 526 ct = vars->braelist[epint] - bbeg; 527 ep++; 528 529 if (ecmp(bbeg, lp, ct)) { 530 lp += ct; 531 continue; 532 } 533 return (0); 534 535 case CBACK | STAR: 536 epint = (int) *ep; 537 bbeg = vars->braslist[epint]; 538 ct = vars->braelist[epint] - bbeg; 539 ep++; 540 curlp = lp; 541 while (ecmp(bbeg, lp, ct)) 542 lp += ct; 543 544 while (lp >= curlp) { 545 if (_advance(lp, ep, vars)) 546 return (1); 547 lp -= ct; 548 } 549 return (0); 550 551 552 case CDOT | STAR: 553 curlp = lp; 554 while (*lp++); 555 goto star; 556 557 case CCHR | STAR: 558 curlp = lp; 559 while (*lp++ == *ep); 560 ep++; 561 goto star; 562 563 case CXCL | STAR: 564 curlp = lp; 565 do { 566 c = (unsigned char)*lp++; 567 } while (ISTHERE(c)); 568 ep += 32; 569 goto star; 570 571 case NCCL | STAR: 572 neg = 1; 573 574 case CCL | STAR: 575 curlp = lp; 576 do { 577 c = *lp++; 578 } while (((c & 0200) == 0 && ISTHERE(c)) ^ neg); 579 ep += 16; 580 goto star; 581 582 star: 583 do { 584 if (--lp == vars->locs) 585 break; 586 if (_advance(lp, ep, vars)) 587 return (1); 588 } while (lp > curlp); 589 return (0); 590 591 } 592 } 593 } 594 595 static void getrnge(char *str, step_vars_storage *vars) 596 { 597 vars->low = *str++ & 0377; 598 vars->size = ((*str & 0377) == 255)? 20000: (*str &0377) - vars->low; 599 } 600 601 602