Home | History | Annotate | Download | only in mod_sed
      1 /*
      2  * Copyright (c) 2005, 2008 Sun Microsystems, Inc. All Rights Reserved.
      3  * Use is subject to license terms.
      4  *
      5  *	Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T
      6  *	  All Rights Reserved
      7  *
      8  * University Copyright- Copyright (c) 1982, 1986, 1988
      9  * The Regents of the University of California
     10  * All Rights Reserved
     11  *
     12  * University Acknowledgment- Portions of this document are derived from
     13  * software developed by the University of California, Berkeley, and its
     14  * contributors.
     15  *
     16  * Licensed under the Apache License, Version 2.0 (the "License");
     17  * you may not use this file except in compliance with the License.
     18  * You may obtain a copy of the License at
     19  *  http://www.apache.org/licenses/LICENSE-2.0.
     20  *
     21  * Unless required by applicable law or agreed to in writing, software
     22  * distributed under the License is distributed on an "AS IS" BASIS,
     23  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
     24  * or implied.
     25  * See the License for the specific language governing permissions and
     26  * limitations under the License.
     27  */
     28 
     29 /* Code moved from regexp.h */
     30 
     31 #include "apr.h"
     32 #include "apr_lib.h"
     33 #ifdef APR_HAVE_LIMITS_H
     34 #include <limits.h>
     35 #endif
     36 #if APR_HAVE_STDLIB_H
     37 #include <stdlib.h>
     38 #endif
     39 #include "libsed.h"
     40 #include "regexp.h"
     41 #include "sed.h"
     42 
     43 #define GETC() ((unsigned char)*sp++)
     44 #define PEEKC() ((unsigned char)*sp)
     45 #define UNGETC(c) (--sp)
     46 #define SEDCOMPILE_ERROR(c) { \
     47             regerrno = c; \
     48             goto out; \
     49             }
     50 #define ecmp(s1, s2, n)    (strncmp(s1, s2, n) == 0)
     51 #define uletter(c) (isalpha(c) || c == '_')
     52 
     53 
     54 static unsigned char bittab[] = { 1, 2, 4, 8, 16, 32, 64, 128 };
     55 
     56 static int regerr(sed_commands_t *commands, int err);
     57 static void comperr(sed_commands_t *commands, char *msg);
     58 static void getrnge(char *str, step_vars_storage *vars);
     59 static int _advance(char *, char *, step_vars_storage *);
     60 extern int sed_step(char *p1, char *p2, int circf, step_vars_storage *vars);
     61 
     62 
     63 static void comperr(sed_commands_t *commands, char *msg)
     64 {
     65     command_errf(commands, msg, commands->linebuf);
     66 }
     67 
     68 /*
     69 */
     70 static int regerr(sed_commands_t *commands, int err)
     71 {
     72     switch(err) {
     73     case 0:
     74         /* No error */
     75         break;
     76     case 11:
     77         comperr(commands, "Range endpoint too large: %s");
     78         break;
     79 
     80     case 16:
     81         comperr(commands, "Bad number: %s");
     82         break;
     83 
     84     case 25:
     85         comperr(commands, "``\\digit'' out of range: %s");
     86         break;
     87 
     88     case 36:
     89         comperr(commands, "Illegal or missing delimiter: %s");
     90         break;
     91 
     92     case 41:
     93         comperr(commands, "No remembered search string: %s");
     94         break;
     95 
     96     case 42:
     97         comperr(commands, "\\( \\) imbalance: %s");
     98         break;
     99 
    100     case 43:
    101         comperr(commands, "Too many \\(: %s");
    102         break;
    103 
    104     case 44:
    105         comperr(commands, "More than 2 numbers given in \\{ \\}: %s");
    106         break;
    107 
    108     case 45:
    109         comperr(commands, "} expected after \\: %s");
    110         break;
    111 
    112     case 46:
    113         comperr(commands, "First number exceeds second in \\{ \\}: %s");
    114         break;
    115 
    116     case 49:
    117         comperr(commands, "[ ] imbalance: %s");
    118         break;
    119 
    120     case 50:
    121         comperr(commands, SEDERR_TMMES);
    122         break;
    123 
    124     default:
    125         comperr(commands, "Unknown regexp error code %s\n");
    126         break;
    127     }
    128     return (0);
    129 }
    130 
    131 
    132 char *sed_compile(sed_commands_t *commands, sed_comp_args *compargs,
    133                   char *ep, char *endbuf, int seof)
    134 {
    135     int c;
    136     int eof = seof;
    137     char *lastep;
    138     int cclcnt;
    139     char bracket[NBRA], *bracketp;
    140     int closed;
    141     int neg;
    142     int lc;
    143     int i, cflg;
    144     int iflag; /* used for non-ascii characters in brackets */
    145     int nodelim = 0;
    146     char *sp = commands->cp;
    147     int regerrno = 0;
    148 
    149     lastep = 0;
    150     if ((c = GETC()) == eof || c == '\n') {
    151         if (c == '\n') {
    152             UNGETC(c);
    153             nodelim = 1;
    154         }
    155         commands->cp = sp;
    156         goto out;
    157     }
    158     bracketp = bracket;
    159     compargs->circf = closed = compargs->nbra = 0;
    160     if (c == '^')
    161         compargs->circf++;
    162     else
    163         UNGETC(c);
    164     while (1) {
    165         if (ep >= endbuf)
    166             SEDCOMPILE_ERROR(50);
    167         c = GETC();
    168         if (c != '*' && ((c != '\\') || (PEEKC() != '{')))
    169             lastep = ep;
    170         if (c == eof) {
    171             *ep++ = CCEOF;
    172             if (bracketp != bracket)
    173                 SEDCOMPILE_ERROR(42);
    174             commands->cp = sp;
    175             goto out;
    176         }
    177         switch (c) {
    178 
    179         case '.':
    180             *ep++ = CDOT;
    181             continue;
    182 
    183         case '\n':
    184             SEDCOMPILE_ERROR(36);
    185             commands->cp = sp;
    186             goto out;
    187         case '*':
    188             if (lastep == 0 || *lastep == CBRA || *lastep == CKET)
    189                 goto defchar;
    190             *lastep |= STAR;
    191             continue;
    192 
    193         case '$':
    194             if (PEEKC() != eof && PEEKC() != '\n')
    195                 goto defchar;
    196             *ep++ = CDOL;
    197             continue;
    198 
    199         case '[':
    200             if (&ep[17] >= endbuf)
    201                 SEDCOMPILE_ERROR(50);
    202 
    203             *ep++ = CCL;
    204             lc = 0;
    205             for (i = 0; i < 16; i++)
    206                 ep[i] = 0;
    207 
    208             neg = 0;
    209             if ((c = GETC()) == '^') {
    210                 neg = 1;
    211                 c = GETC();
    212             }
    213             iflag = 1;
    214             do {
    215                 c &= 0377;
    216                 if (c == '\0' || c == '\n')
    217                     SEDCOMPILE_ERROR(49);
    218                 if ((c & 0200) && iflag) {
    219                     iflag = 0;
    220                     if (&ep[32] >= endbuf)
    221                         SEDCOMPILE_ERROR(50);
    222                     ep[-1] = CXCL;
    223                     for (i = 16; i < 32; i++)
    224                         ep[i] = 0;
    225                 }
    226                 if (c == '-' && lc != 0) {
    227                     if ((c = GETC()) == ']') {
    228                         PLACE('-');
    229                         break;
    230                     }
    231                     if ((c & 0200) && iflag) {
    232                         iflag = 0;
    233                         if (&ep[32] >= endbuf)
    234                             SEDCOMPILE_ERROR(50);
    235                         ep[-1] = CXCL;
    236                         for (i = 16; i < 32; i++)
    237                             ep[i] = 0;
    238                     }
    239                     while (lc < c) {
    240                         PLACE(lc);
    241                         lc++;
    242                     }
    243                 }
    244                 lc = c;
    245                 PLACE(c);
    246             } while ((c = GETC()) != ']');
    247 
    248             if (iflag)
    249                 iflag = 16;
    250             else
    251                 iflag = 32;
    252 
    253             if (neg) {
    254                 if (iflag == 32) {
    255                     for (cclcnt = 0; cclcnt < iflag;
    256                         cclcnt++)
    257                         ep[cclcnt] ^= 0377;
    258                     ep[0] &= 0376;
    259                 } else {
    260                     ep[-1] = NCCL;
    261                     /* make nulls match so test fails */
    262                     ep[0] |= 01;
    263                 }
    264             }
    265 
    266             ep += iflag;
    267 
    268             continue;
    269 
    270         case '\\':
    271             switch (c = GETC()) {
    272 
    273             case '(':
    274                 if (compargs->nbra >= NBRA)
    275                     SEDCOMPILE_ERROR(43);
    276                 *bracketp++ = compargs->nbra;
    277                 *ep++ = CBRA;
    278                 *ep++ = compargs->nbra++;
    279                 continue;
    280 
    281             case ')':
    282                 if (bracketp <= bracket)
    283                     SEDCOMPILE_ERROR(42);
    284                 *ep++ = CKET;
    285                 *ep++ = *--bracketp;
    286                 closed++;
    287                 continue;
    288 
    289             case '{':
    290                 if (lastep == (char *) 0)
    291                     goto defchar;
    292                 *lastep |= RNGE;
    293                 cflg = 0;
    294             nlim:
    295                 c = GETC();
    296                 i = 0;
    297                 do {
    298                     if ('0' <= c && c <= '9')
    299                         i = 10 * i + c - '0';
    300                     else
    301                         SEDCOMPILE_ERROR(16);
    302                 } while (((c = GETC()) != '\\') && (c != ','));
    303                 if (i >= 255)
    304                     SEDCOMPILE_ERROR(11);
    305                 *ep++ = i;
    306                 if (c == ',') {
    307                     if (cflg++)
    308                         SEDCOMPILE_ERROR(44);
    309                     if ((c = GETC()) == '\\')
    310                         *ep++ = (char) 255;
    311                     else {
    312                         UNGETC(c);
    313                         goto nlim;
    314                         /* get 2'nd number */
    315                     }
    316                 }
    317                 if (GETC() != '}')
    318                     SEDCOMPILE_ERROR(45);
    319                 if (!cflg)    /* one number */
    320                     *ep++ = i;
    321                 else if ((ep[-1] & 0377) < (ep[-2] & 0377))
    322                     SEDCOMPILE_ERROR(46);
    323                 continue;
    324 
    325             case '\n':
    326                 SEDCOMPILE_ERROR(36);
    327 
    328             case 'n':
    329                 c = '\n';
    330                 goto defchar;
    331 
    332             default:
    333                 if (c >= '1' && c <= '9') {
    334                     if ((c -= '1') >= closed)
    335                         SEDCOMPILE_ERROR(25);
    336                     *ep++ = CBACK;
    337                     *ep++ = c;
    338                     continue;
    339                 }
    340             }
    341     /* Drop through to default to use \ to turn off special chars */
    342 
    343         defchar:
    344         default:
    345             lastep = ep;
    346             *ep++ = CCHR;
    347             *ep++ = c;
    348         }
    349     }
    350 out:
    351     if (regerrno) {
    352         regerr(commands, regerrno);
    353         return (char*) NULL;
    354     }
    355     /* XXX : Basant : what extra */
    356     /* int reglength = (int)(ep - expbuf); */
    357     return ep;
    358 }
    359 
    360 int sed_step(char *p1, char *p2, int circf, step_vars_storage *vars)
    361 {
    362     int c;
    363 
    364 
    365     if (circf) {
    366         vars->loc1 = p1;
    367         return (_advance(p1, p2, vars));
    368     }
    369     /* fast check for first character */
    370     if (*p2 == CCHR) {
    371         c = p2[1];
    372         do {
    373             if (*p1 != c)
    374                 continue;
    375             if (_advance(p1, p2, vars)) {
    376                 vars->loc1 = p1;
    377                 return (1);
    378             }
    379         } while (*p1++);
    380         return (0);
    381     }
    382         /* regular algorithm */
    383     do {
    384         if (_advance(p1, p2, vars)) {
    385             vars->loc1 = p1;
    386             return (1);
    387         }
    388     } while (*p1++);
    389     return (0);
    390 }
    391 
    392 static int _advance(char *lp, char *ep, step_vars_storage *vars)
    393 {
    394     char *curlp;
    395     int c;
    396     char *bbeg;
    397     char neg;
    398     int ct;
    399     int epint; /* int value of *ep */
    400 
    401     while (1) {
    402         neg = 0;
    403         switch (*ep++) {
    404 
    405         case CCHR:
    406             if (*ep++ == *lp++)
    407                 continue;
    408             return (0);
    409 
    410         case CDOT:
    411             if (*lp++)
    412                 continue;
    413             return (0);
    414 
    415         case CDOL:
    416             if (*lp == 0)
    417                 continue;
    418             return (0);
    419 
    420         case CCEOF:
    421             vars->loc2 = lp;
    422             return (1);
    423 
    424         case CXCL:
    425             c = (unsigned char)*lp++;
    426             if (ISTHERE(c)) {
    427                 ep += 32;
    428                 continue;
    429             }
    430             return (0);
    431 
    432         case NCCL:
    433             neg = 1;
    434 
    435         case CCL:
    436             c = *lp++;
    437             if (((c & 0200) == 0 && ISTHERE(c)) ^ neg) {
    438                 ep += 16;
    439                 continue;
    440             }
    441             return (0);
    442 
    443         case CBRA:
    444             epint = (int) *ep;
    445             vars->braslist[epint] = lp;
    446             ep++;
    447             continue;
    448 
    449         case CKET:
    450             epint = (int) *ep;
    451             vars->braelist[epint] = lp;
    452             ep++;
    453             continue;
    454 
    455         case CCHR | RNGE:
    456             c = *ep++;
    457             getrnge(ep, vars);
    458             while (vars->low--)
    459                 if (*lp++ != c)
    460                     return (0);
    461             curlp = lp;
    462             while (vars->size--)
    463                 if (*lp++ != c)
    464                     break;
    465             if (vars->size < 0)
    466                 lp++;
    467             ep += 2;
    468             goto star;
    469 
    470         case CDOT | RNGE:
    471             getrnge(ep, vars);
    472             while (vars->low--)
    473                 if (*lp++ == '\0')
    474                     return (0);
    475             curlp = lp;
    476             while (vars->size--)
    477                 if (*lp++ == '\0')
    478                     break;
    479             if (vars->size < 0)
    480                 lp++;
    481             ep += 2;
    482             goto star;
    483 
    484         case CXCL | RNGE:
    485             getrnge(ep + 32, vars);
    486             while (vars->low--) {
    487                 c = (unsigned char)*lp++;
    488                 if (!ISTHERE(c))
    489                     return (0);
    490             }
    491             curlp = lp;
    492             while (vars->size--) {
    493                 c = (unsigned char)*lp++;
    494                 if (!ISTHERE(c))
    495                     break;
    496             }
    497             if (vars->size < 0)
    498                 lp++;
    499             ep += 34;        /* 32 + 2 */
    500             goto star;
    501 
    502         case NCCL | RNGE:
    503             neg = 1;
    504 
    505         case CCL | RNGE:
    506             getrnge(ep + 16, vars);
    507             while (vars->low--) {
    508                 c = *lp++;
    509                 if (((c & 0200) || !ISTHERE(c)) ^ neg)
    510                     return (0);
    511             }
    512             curlp = lp;
    513             while (vars->size--) {
    514                 c = *lp++;
    515                 if (((c & 0200) || !ISTHERE(c)) ^ neg)
    516                     break;
    517             }
    518             if (vars->size < 0)
    519                 lp++;
    520             ep += 18;         /* 16 + 2 */
    521             goto star;
    522 
    523         case CBACK:
    524             epint = (int) *ep;
    525             bbeg = vars->braslist[epint];
    526             ct = vars->braelist[epint] - bbeg;
    527             ep++;
    528 
    529             if (ecmp(bbeg, lp, ct)) {
    530                 lp += ct;
    531                 continue;
    532             }
    533             return (0);
    534 
    535         case CBACK | STAR:
    536             epint = (int) *ep;
    537             bbeg = vars->braslist[epint];
    538             ct = vars->braelist[epint] - bbeg;
    539             ep++;
    540             curlp = lp;
    541             while (ecmp(bbeg, lp, ct))
    542                 lp += ct;
    543 
    544             while (lp >= curlp) {
    545                 if (_advance(lp, ep, vars))
    546                     return (1);
    547                 lp -= ct;
    548             }
    549             return (0);
    550 
    551 
    552         case CDOT | STAR:
    553             curlp = lp;
    554             while (*lp++);
    555             goto star;
    556 
    557         case CCHR | STAR:
    558             curlp = lp;
    559             while (*lp++ == *ep);
    560             ep++;
    561             goto star;
    562 
    563         case CXCL | STAR:
    564             curlp = lp;
    565             do {
    566                 c = (unsigned char)*lp++;
    567             } while (ISTHERE(c));
    568             ep += 32;
    569             goto star;
    570 
    571         case NCCL | STAR:
    572             neg = 1;
    573 
    574         case CCL | STAR:
    575             curlp = lp;
    576             do {
    577                 c = *lp++;
    578             } while (((c & 0200) == 0 && ISTHERE(c)) ^ neg);
    579             ep += 16;
    580             goto star;
    581 
    582         star:
    583             do {
    584                 if (--lp == vars->locs)
    585                     break;
    586                 if (_advance(lp, ep, vars))
    587                     return (1);
    588             } while (lp > curlp);
    589             return (0);
    590 
    591         }
    592     }
    593 }
    594 
    595 static void getrnge(char *str, step_vars_storage *vars)
    596 {
    597     vars->low = *str++ & 0377;
    598     vars->size = ((*str & 0377) == 255)? 20000: (*str &0377) - vars->low;
    599 }
    600 
    601 
    602