1 0 jyri /* 2 0 jyri * Copyright (c) 2005, 2008 Sun Microsystems, Inc. All Rights Reserved. 3 0 jyri * Use is subject to license terms. 4 0 jyri * 5 0 jyri * Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T 6 0 jyri * All Rights Reserved 7 0 jyri * 8 0 jyri * University Copyright- Copyright (c) 1982, 1986, 1988 9 0 jyri * The Regents of the University of California 10 0 jyri * All Rights Reserved 11 0 jyri * 12 0 jyri * University Acknowledgment- Portions of this document are derived from 13 0 jyri * software developed by the University of California, Berkeley, and its 14 0 jyri * contributors. 15 0 jyri * 16 0 jyri * Licensed under the Apache License, Version 2.0 (the "License"); 17 0 jyri * you may not use this file except in compliance with the License. 18 0 jyri * You may obtain a copy of the License at 19 0 jyri * http://www.apache.org/licenses/LICENSE-2.0. 20 0 jyri * 21 0 jyri * Unless required by applicable law or agreed to in writing, software 22 0 jyri * distributed under the License is distributed on an "AS IS" BASIS, 23 0 jyri * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express 24 0 jyri * or implied. 25 0 jyri * See the License for the specific language governing permissions and 26 0 jyri * limitations under the License. 27 0 jyri */ 28 0 jyri 29 0 jyri /* Code moved from regexp.h */ 30 0 jyri 31 0 jyri #include "apr.h" 32 0 jyri #include "apr_lib.h" 33 0 jyri #ifdef APR_HAVE_LIMITS_H 34 0 jyri #include <limits.h> 35 0 jyri #endif 36 0 jyri #if APR_HAVE_STDLIB_H 37 0 jyri #include <stdlib.h> 38 0 jyri #endif 39 0 jyri #include "libsed.h" 40 0 jyri #include "regexp.h" 41 0 jyri #include "sed.h" 42 0 jyri 43 0 jyri #define GETC() ((unsigned char)*sp++) 44 0 jyri #define PEEKC() ((unsigned char)*sp) 45 0 jyri #define UNGETC(c) (--sp) 46 0 jyri #define SEDCOMPILE_ERROR(c) { \ 47 0 jyri regerrno = c; \ 48 0 jyri goto out; \ 49 0 jyri } 50 0 jyri #define ecmp(s1, s2, n) (strncmp(s1, s2, n) == 0) 51 0 jyri #define uletter(c) (isalpha(c) || c == '_') 52 0 jyri 53 0 jyri 54 0 jyri static unsigned char bittab[] = { 1, 2, 4, 8, 16, 32, 64, 128 }; 55 0 jyri 56 0 jyri static int regerr(sed_commands_t *commands, int err); 57 0 jyri static void comperr(sed_commands_t *commands, char *msg); 58 0 jyri static void getrnge(char *str, step_vars_storage *vars); 59 0 jyri static int _advance(char *, char *, step_vars_storage *); 60 0 jyri extern int sed_step(char *p1, char *p2, int circf, step_vars_storage *vars); 61 0 jyri 62 0 jyri 63 0 jyri static void comperr(sed_commands_t *commands, char *msg) 64 0 jyri { 65 0 jyri command_errf(commands, msg, commands->linebuf); 66 0 jyri } 67 0 jyri 68 0 jyri /* 69 0 jyri */ 70 0 jyri static int regerr(sed_commands_t *commands, int err) 71 0 jyri { 72 0 jyri switch(err) { 73 0 jyri case 0: 74 0 jyri /* No error */ 75 0 jyri break; 76 0 jyri case 11: 77 0 jyri comperr(commands, "Range endpoint too large: %s"); 78 0 jyri break; 79 0 jyri 80 0 jyri case 16: 81 0 jyri comperr(commands, "Bad number: %s"); 82 0 jyri break; 83 0 jyri 84 0 jyri case 25: 85 0 jyri comperr(commands, "``\\digit'' out of range: %s"); 86 0 jyri break; 87 0 jyri 88 0 jyri case 36: 89 0 jyri comperr(commands, "Illegal or missing delimiter: %s"); 90 0 jyri break; 91 0 jyri 92 0 jyri case 41: 93 0 jyri comperr(commands, "No remembered search string: %s"); 94 0 jyri break; 95 0 jyri 96 0 jyri case 42: 97 0 jyri comperr(commands, "\\( \\) imbalance: %s"); 98 0 jyri break; 99 0 jyri 100 0 jyri case 43: 101 0 jyri comperr(commands, "Too many \\(: %s"); 102 0 jyri break; 103 0 jyri 104 0 jyri case 44: 105 0 jyri comperr(commands, "More than 2 numbers given in \\{ \\}: %s"); 106 0 jyri break; 107 0 jyri 108 0 jyri case 45: 109 0 jyri comperr(commands, "} expected after \\: %s"); 110 0 jyri break; 111 0 jyri 112 0 jyri case 46: 113 0 jyri comperr(commands, "First number exceeds second in \\{ \\}: %s"); 114 0 jyri break; 115 0 jyri 116 0 jyri case 49: 117 0 jyri comperr(commands, "[ ] imbalance: %s"); 118 0 jyri break; 119 0 jyri 120 0 jyri case 50: 121 0 jyri comperr(commands, SEDERR_TMMES); 122 0 jyri break; 123 0 jyri 124 0 jyri default: 125 0 jyri comperr(commands, "Unknown regexp error code %s\n"); 126 0 jyri break; 127 0 jyri } 128 0 jyri return (0); 129 0 jyri } 130 0 jyri 131 0 jyri 132 0 jyri char *sed_compile(sed_commands_t *commands, sed_comp_args *compargs, 133 0 jyri char *ep, char *endbuf, int seof) 134 0 jyri { 135 0 jyri int c; 136 0 jyri int eof = seof; 137 0 jyri char *lastep; 138 0 jyri int cclcnt; 139 0 jyri char bracket[NBRA], *bracketp; 140 0 jyri int closed; 141 0 jyri int neg; 142 0 jyri int lc; 143 0 jyri int i, cflg; 144 0 jyri int iflag; /* used for non-ascii characters in brackets */ 145 0 jyri int nodelim = 0; 146 0 jyri char *sp = commands->cp; 147 0 jyri int regerrno = 0; 148 0 jyri 149 0 jyri lastep = 0; 150 0 jyri if ((c = GETC()) == eof || c == '\n') { 151 0 jyri if (c == '\n') { 152 0 jyri UNGETC(c); 153 0 jyri nodelim = 1; 154 0 jyri } 155 0 jyri commands->cp = sp; 156 0 jyri goto out; 157 0 jyri } 158 0 jyri bracketp = bracket; 159 0 jyri compargs->circf = closed = compargs->nbra = 0; 160 0 jyri if (c == '^') 161 0 jyri compargs->circf++; 162 0 jyri else 163 0 jyri UNGETC(c); 164 0 jyri while (1) { 165 0 jyri if (ep >= endbuf) 166 0 jyri SEDCOMPILE_ERROR(50); 167 0 jyri c = GETC(); 168 0 jyri if (c != '*' && ((c != '\\') || (PEEKC() != '{'))) 169 0 jyri lastep = ep; 170 0 jyri if (c == eof) { 171 0 jyri *ep++ = CCEOF; 172 0 jyri if (bracketp != bracket) 173 0 jyri SEDCOMPILE_ERROR(42); 174 0 jyri commands->cp = sp; 175 0 jyri goto out; 176 0 jyri } 177 0 jyri switch (c) { 178 0 jyri 179 0 jyri case '.': 180 0 jyri *ep++ = CDOT; 181 0 jyri continue; 182 0 jyri 183 0 jyri case '\n': 184 0 jyri SEDCOMPILE_ERROR(36); 185 0 jyri commands->cp = sp; 186 0 jyri goto out; 187 0 jyri case '*': 188 0 jyri if (lastep == 0 || *lastep == CBRA || *lastep == CKET) 189 0 jyri goto defchar; 190 0 jyri *lastep |= STAR; 191 0 jyri continue; 192 0 jyri 193 0 jyri case '$': 194 0 jyri if (PEEKC() != eof && PEEKC() != '\n') 195 0 jyri goto defchar; 196 0 jyri *ep++ = CDOL; 197 0 jyri continue; 198 0 jyri 199 0 jyri case '[': 200 0 jyri if (&ep[17] >= endbuf) 201 0 jyri SEDCOMPILE_ERROR(50); 202 0 jyri 203 0 jyri *ep++ = CCL; 204 0 jyri lc = 0; 205 0 jyri for (i = 0; i < 16; i++) 206 0 jyri ep[i] = 0; 207 0 jyri 208 0 jyri neg = 0; 209 0 jyri if ((c = GETC()) == '^') { 210 0 jyri neg = 1; 211 0 jyri c = GETC(); 212 0 jyri } 213 0 jyri iflag = 1; 214 0 jyri do { 215 0 jyri c &= 0377; 216 0 jyri if (c == '\0' || c == '\n') 217 0 jyri SEDCOMPILE_ERROR(49); 218 0 jyri if ((c & 0200) && iflag) { 219 0 jyri iflag = 0; 220 0 jyri if (&ep[32] >= endbuf) 221 0 jyri SEDCOMPILE_ERROR(50); 222 0 jyri ep[-1] = CXCL; 223 0 jyri for (i = 16; i < 32; i++) 224 0 jyri ep[i] = 0; 225 0 jyri } 226 0 jyri if (c == '-' && lc != 0) { 227 0 jyri if ((c = GETC()) == ']') { 228 0 jyri PLACE('-'); 229 0 jyri break; 230 0 jyri } 231 0 jyri if ((c & 0200) && iflag) { 232 0 jyri iflag = 0; 233 0 jyri if (&ep[32] >= endbuf) 234 0 jyri SEDCOMPILE_ERROR(50); 235 0 jyri ep[-1] = CXCL; 236 0 jyri for (i = 16; i < 32; i++) 237 0 jyri ep[i] = 0; 238 0 jyri } 239 0 jyri while (lc < c) { 240 0 jyri PLACE(lc); 241 0 jyri lc++; 242 0 jyri } 243 0 jyri } 244 0 jyri lc = c; 245 0 jyri PLACE(c); 246 0 jyri } while ((c = GETC()) != ']'); 247 0 jyri 248 0 jyri if (iflag) 249 0 jyri iflag = 16; 250 0 jyri else 251 0 jyri iflag = 32; 252 0 jyri 253 0 jyri if (neg) { 254 0 jyri if (iflag == 32) { 255 0 jyri for (cclcnt = 0; cclcnt < iflag; 256 0 jyri cclcnt++) 257 0 jyri ep[cclcnt] ^= 0377; 258 0 jyri ep[0] &= 0376; 259 0 jyri } else { 260 0 jyri ep[-1] = NCCL; 261 0 jyri /* make nulls match so test fails */ 262 0 jyri ep[0] |= 01; 263 0 jyri } 264 0 jyri } 265 0 jyri 266 0 jyri ep += iflag; 267 0 jyri 268 0 jyri continue; 269 0 jyri 270 0 jyri case '\\': 271 0 jyri switch (c = GETC()) { 272 0 jyri 273 0 jyri case '(': 274 0 jyri if (compargs->nbra >= NBRA) 275 0 jyri SEDCOMPILE_ERROR(43); 276 0 jyri *bracketp++ = compargs->nbra; 277 0 jyri *ep++ = CBRA; 278 0 jyri *ep++ = compargs->nbra++; 279 0 jyri continue; 280 0 jyri 281 0 jyri case ')': 282 0 jyri if (bracketp <= bracket) 283 0 jyri SEDCOMPILE_ERROR(42); 284 0 jyri *ep++ = CKET; 285 0 jyri *ep++ = *--bracketp; 286 0 jyri closed++; 287 0 jyri continue; 288 0 jyri 289 0 jyri case '{': 290 0 jyri if (lastep == (char *) 0) 291 0 jyri goto defchar; 292 0 jyri *lastep |= RNGE; 293 0 jyri cflg = 0; 294 0 jyri nlim: 295 0 jyri c = GETC(); 296 0 jyri i = 0; 297 0 jyri do { 298 0 jyri if ('0' <= c && c <= '9') 299 0 jyri i = 10 * i + c - '0'; 300 0 jyri else 301 0 jyri SEDCOMPILE_ERROR(16); 302 0 jyri } while (((c = GETC()) != '\\') && (c != ',')); 303 0 jyri if (i >= 255) 304 0 jyri SEDCOMPILE_ERROR(11); 305 0 jyri *ep++ = i; 306 0 jyri if (c == ',') { 307 0 jyri if (cflg++) 308 0 jyri SEDCOMPILE_ERROR(44); 309 0 jyri if ((c = GETC()) == '\\') 310 5 basantk *ep++ = (char) 255; 311 0 jyri else { 312 0 jyri UNGETC(c); 313 0 jyri goto nlim; 314 0 jyri /* get 2'nd number */ 315 0 jyri } 316 0 jyri } 317 0 jyri if (GETC() != '}') 318 0 jyri SEDCOMPILE_ERROR(45); 319 0 jyri if (!cflg) /* one number */ 320 0 jyri *ep++ = i; 321 0 jyri else if ((ep[-1] & 0377) < (ep[-2] & 0377)) 322 0 jyri SEDCOMPILE_ERROR(46); 323 0 jyri continue; 324 0 jyri 325 0 jyri case '\n': 326 0 jyri SEDCOMPILE_ERROR(36); 327 0 jyri 328 0 jyri case 'n': 329 0 jyri c = '\n'; 330 0 jyri goto defchar; 331 0 jyri 332 0 jyri default: 333 0 jyri if (c >= '1' && c <= '9') { 334 0 jyri if ((c -= '1') >= closed) 335 0 jyri SEDCOMPILE_ERROR(25); 336 0 jyri *ep++ = CBACK; 337 0 jyri *ep++ = c; 338 0 jyri continue; 339 0 jyri } 340 0 jyri } 341 0 jyri /* Drop through to default to use \ to turn off special chars */ 342 0 jyri 343 0 jyri defchar: 344 0 jyri default: 345 0 jyri lastep = ep; 346 0 jyri *ep++ = CCHR; 347 0 jyri *ep++ = c; 348 0 jyri } 349 0 jyri } 350 0 jyri out: 351 0 jyri if (regerrno) { 352 0 jyri regerr(commands, regerrno); 353 0 jyri return (char*) NULL; 354 0 jyri } 355 0 jyri /* XXX : Basant : what extra */ 356 0 jyri /* int reglength = (int)(ep - expbuf); */ 357 0 jyri return ep; 358 0 jyri } 359 0 jyri 360 0 jyri int sed_step(char *p1, char *p2, int circf, step_vars_storage *vars) 361 0 jyri { 362 0 jyri int c; 363 0 jyri 364 0 jyri 365 0 jyri if (circf) { 366 0 jyri vars->loc1 = p1; 367 0 jyri return (_advance(p1, p2, vars)); 368 0 jyri } 369 0 jyri /* fast check for first character */ 370 0 jyri if (*p2 == CCHR) { 371 0 jyri c = p2[1]; 372 0 jyri do { 373 0 jyri if (*p1 != c) 374 0 jyri continue; 375 0 jyri if (_advance(p1, p2, vars)) { 376 0 jyri vars->loc1 = p1; 377 0 jyri return (1); 378 0 jyri } 379 0 jyri } while (*p1++); 380 0 jyri return (0); 381 0 jyri } 382 0 jyri /* regular algorithm */ 383 0 jyri do { 384 0 jyri if (_advance(p1, p2, vars)) { 385 0 jyri vars->loc1 = p1; 386 0 jyri return (1); 387 0 jyri } 388 0 jyri } while (*p1++); 389 0 jyri return (0); 390 0 jyri } 391 0 jyri 392 0 jyri static int _advance(char *lp, char *ep, step_vars_storage *vars) 393 0 jyri { 394 0 jyri char *curlp; 395 0 jyri int c; 396 0 jyri char *bbeg; 397 0 jyri char neg; 398 0 jyri int ct; 399 8 basantk int epint; /* int value of *ep */ 400 0 jyri 401 0 jyri while (1) { 402 0 jyri neg = 0; 403 0 jyri switch (*ep++) { 404 0 jyri 405 0 jyri case CCHR: 406 0 jyri if (*ep++ == *lp++) 407 0 jyri continue; 408 0 jyri return (0); 409 0 jyri 410 0 jyri case CDOT: 411 0 jyri if (*lp++) 412 0 jyri continue; 413 0 jyri return (0); 414 0 jyri 415 0 jyri case CDOL: 416 0 jyri if (*lp == 0) 417 0 jyri continue; 418 0 jyri return (0); 419 0 jyri 420 0 jyri case CCEOF: 421 0 jyri vars->loc2 = lp; 422 0 jyri return (1); 423 0 jyri 424 0 jyri case CXCL: 425 0 jyri c = (unsigned char)*lp++; 426 0 jyri if (ISTHERE(c)) { 427 0 jyri ep += 32; 428 0 jyri continue; 429 0 jyri } 430 0 jyri return (0); 431 0 jyri 432 0 jyri case NCCL: 433 0 jyri neg = 1; 434 0 jyri 435 0 jyri case CCL: 436 0 jyri c = *lp++; 437 0 jyri if (((c & 0200) == 0 && ISTHERE(c)) ^ neg) { 438 0 jyri ep += 16; 439 0 jyri continue; 440 0 jyri } 441 0 jyri return (0); 442 0 jyri 443 0 jyri case CBRA: 444 8 basantk epint = (int) *ep; 445 8 basantk vars->braslist[epint] = lp; 446 8 basantk ep++; 447 0 jyri continue; 448 0 jyri 449 0 jyri case CKET: 450 8 basantk epint = (int) *ep; 451 8 basantk vars->braelist[epint] = lp; 452 8 basantk ep++; 453 0 jyri continue; 454 0 jyri 455 0 jyri case CCHR | RNGE: 456 0 jyri c = *ep++; 457 0 jyri getrnge(ep, vars); 458 0 jyri while (vars->low--) 459 0 jyri if (*lp++ != c) 460 0 jyri return (0); 461 0 jyri curlp = lp; 462 0 jyri while (vars->size--) 463 0 jyri if (*lp++ != c) 464 0 jyri break; 465 0 jyri if (vars->size < 0) 466 0 jyri lp++; 467 0 jyri ep += 2; 468 0 jyri goto star; 469 0 jyri 470 0 jyri case CDOT | RNGE: 471 0 jyri getrnge(ep, vars); 472 0 jyri while (vars->low--) 473 0 jyri if (*lp++ == '\0') 474 0 jyri return (0); 475 0 jyri curlp = lp; 476 0 jyri while (vars->size--) 477 0 jyri if (*lp++ == '\0') 478 0 jyri break; 479 0 jyri if (vars->size < 0) 480 0 jyri lp++; 481 0 jyri ep += 2; 482 0 jyri goto star; 483 0 jyri 484 0 jyri case CXCL | RNGE: 485 0 jyri getrnge(ep + 32, vars); 486 0 jyri while (vars->low--) { 487 0 jyri c = (unsigned char)*lp++; 488 0 jyri if (!ISTHERE(c)) 489 0 jyri return (0); 490 0 jyri } 491 0 jyri curlp = lp; 492 0 jyri while (vars->size--) { 493 0 jyri c = (unsigned char)*lp++; 494 0 jyri if (!ISTHERE(c)) 495 0 jyri break; 496 0 jyri } 497 0 jyri if (vars->size < 0) 498 0 jyri lp++; 499 0 jyri ep += 34; /* 32 + 2 */ 500 0 jyri goto star; 501 0 jyri 502 0 jyri case NCCL | RNGE: 503 0 jyri neg = 1; 504 0 jyri 505 0 jyri case CCL | RNGE: 506 0 jyri getrnge(ep + 16, vars); 507 0 jyri while (vars->low--) { 508 0 jyri c = *lp++; 509 0 jyri if (((c & 0200) || !ISTHERE(c)) ^ neg) 510 0 jyri return (0); 511 0 jyri } 512 0 jyri curlp = lp; 513 0 jyri while (vars->size--) { 514 0 jyri c = *lp++; 515 0 jyri if (((c & 0200) || !ISTHERE(c)) ^ neg) 516 0 jyri break; 517 0 jyri } 518 0 jyri if (vars->size < 0) 519 0 jyri lp++; 520 0 jyri ep += 18; /* 16 + 2 */ 521 0 jyri goto star; 522 0 jyri 523 0 jyri case CBACK: 524 8 basantk epint = (int) *ep; 525 8 basantk bbeg = vars->braslist[epint]; 526 8 basantk ct = vars->braelist[epint] - bbeg; 527 8 basantk ep++; 528 0 jyri 529 0 jyri if (ecmp(bbeg, lp, ct)) { 530 0 jyri lp += ct; 531 0 jyri continue; 532 0 jyri } 533 0 jyri return (0); 534 0 jyri 535 0 jyri case CBACK | STAR: 536 8 basantk epint = (int) *ep; 537 8 basantk bbeg = vars->braslist[epint]; 538 8 basantk ct = vars->braelist[epint] - bbeg; 539 8 basantk ep++; 540 0 jyri curlp = lp; 541 0 jyri while (ecmp(bbeg, lp, ct)) 542 0 jyri lp += ct; 543 0 jyri 544 0 jyri while (lp >= curlp) { 545 0 jyri if (_advance(lp, ep, vars)) 546 0 jyri return (1); 547 0 jyri lp -= ct; 548 0 jyri } 549 0 jyri return (0); 550 0 jyri 551 0 jyri 552 0 jyri case CDOT | STAR: 553 0 jyri curlp = lp; 554 0 jyri while (*lp++); 555 0 jyri goto star; 556 0 jyri 557 0 jyri case CCHR | STAR: 558 0 jyri curlp = lp; 559 0 jyri while (*lp++ == *ep); 560 0 jyri ep++; 561 0 jyri goto star; 562 0 jyri 563 0 jyri case CXCL | STAR: 564 0 jyri curlp = lp; 565 0 jyri do { 566 0 jyri c = (unsigned char)*lp++; 567 0 jyri } while (ISTHERE(c)); 568 0 jyri ep += 32; 569 0 jyri goto star; 570 0 jyri 571 0 jyri case NCCL | STAR: 572 0 jyri neg = 1; 573 0 jyri 574 0 jyri case CCL | STAR: 575 0 jyri curlp = lp; 576 0 jyri do { 577 0 jyri c = *lp++; 578 0 jyri } while (((c & 0200) == 0 && ISTHERE(c)) ^ neg); 579 0 jyri ep += 16; 580 0 jyri goto star; 581 0 jyri 582 0 jyri star: 583 0 jyri do { 584 0 jyri if (--lp == vars->locs) 585 0 jyri break; 586 0 jyri if (_advance(lp, ep, vars)) 587 0 jyri return (1); 588 0 jyri } while (lp > curlp); 589 0 jyri return (0); 590 0 jyri 591 0 jyri } 592 0 jyri } 593 0 jyri } 594 0 jyri 595 0 jyri static void getrnge(char *str, step_vars_storage *vars) 596 0 jyri { 597 0 jyri vars->low = *str++ & 0377; 598 0 jyri vars->size = ((*str & 0377) == 255)? 20000: (*str &0377) - vars->low; 599 0 jyri } 600 0 jyri 601 0 jyri 602