Home | History | Annotate | Download | only in awk
      1 /*
      2  * CDDL HEADER START
      3  *
      4  * The contents of this file are subject to the terms of the
      5  * Common Development and Distribution License, Version 1.0 only
      6  * (the "License").  You may not use this file except in compliance
      7  * with the License.
      8  *
      9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
     10  * or http://www.opensolaris.org/os/licensing.
     11  * See the License for the specific language governing permissions
     12  * and limitations under the License.
     13  *
     14  * When distributing Covered Code, include this CDDL HEADER in each
     15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
     16  * If applicable, add the following below this CDDL HEADER, with the
     17  * fields enclosed by brackets "[]" replaced with your own identifying
     18  * information: Portions Copyright [yyyy] [name of copyright owner]
     19  *
     20  * CDDL HEADER END
     21  */
     22 
     23 /*
     24  * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
     25  * Use is subject to license terms.
     26  */
     27 
     28 /*	Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T	*/
     29 /*	  All Rights Reserved  	*/
     30 
     31 #pragma ident	"%Z%%M%	%I%	%E% SMI"
     32 
     33 #define	DEBUG
     34 
     35 #include "awk.h"
     36 #include "y.tab.h"
     37 
     38 #define	HAT	(NCHARS-1)	/* matches ^ in regular expr */
     39 				/* NCHARS is 2**n */
     40 #define	MAXLIN (3 * LINE_MAX)
     41 
     42 #define	type(v)		(v)->nobj
     43 #define	left(v)		(v)->narg[0]
     44 #define	right(v)	(v)->narg[1]
     45 #define	parent(v)	(v)->nnext
     46 
     47 #define	LEAF	case CCL: case NCCL: case CHAR: case DOT: case FINAL: case ALL:
     48 #define	UNARY	case STAR: case PLUS: case QUEST:
     49 
     50 /*
     51  * encoding in tree Nodes:
     52  *	leaf (CCL, NCCL, CHAR, DOT, FINAL, ALL):
     53  *		left is index, right contains value or pointer to value
     54  *	unary (STAR, PLUS, QUEST): left is child, right is null
     55  *	binary (CAT, OR): left and right are children
     56  *	parent contains pointer to parent
     57  */
     58 
     59 int	setvec[MAXLIN];
     60 int	tmpset[MAXLIN];
     61 Node	*point[MAXLIN];
     62 
     63 int	rtok;		/* next token in current re */
     64 int	rlxval;
     65 uchar	*rlxstr;
     66 uchar	*prestr;	/* current position in current re */
     67 uchar	*lastre;	/* origin of last re */
     68 
     69 static	int setcnt;
     70 static	int poscnt;
     71 
     72 uchar	*patbeg;
     73 int	patlen;
     74 
     75 #define	NFA	20	/* cache this many dynamic fa's */
     76 fa	*fatab[NFA];
     77 int	nfatab	= 0;	/* entries in fatab */
     78 
     79 static fa	*mkdfa(uchar *, int);
     80 static int	makeinit(fa *, int);
     81 static void	penter(Node *);
     82 static void	freetr(Node *);
     83 static void	overflo(char *);
     84 static void	cfoll(fa *, Node *);
     85 static void	follow(Node *);
     86 static Node	*reparse(uchar *);
     87 static int	relex(void);
     88 static void	freefa(fa *);
     89 static int	cgoto(fa *, int, int);
     90 
     91 fa *
     92 makedfa(uchar *s, int anchor)	/* returns dfa for reg expr s */
     93 {
     94 	int i, use, nuse;
     95 	fa *pfa;
     96 
     97 	if (compile_time)	/* a constant for sure */
     98 		return (mkdfa(s, anchor));
     99 	for (i = 0; i < nfatab; i++) {	/* is it there already? */
    100 		if (fatab[i]->anchor == anchor &&
    101 		    strcmp((char *)fatab[i]->restr, (char *)s) == 0) {
    102 			fatab[i]->use++;
    103 			return (fatab[i]);
    104 		}
    105 	}
    106 	pfa = mkdfa(s, anchor);
    107 	if (nfatab < NFA) {	/* room for another */
    108 		fatab[nfatab] = pfa;
    109 		fatab[nfatab]->use = 1;
    110 		nfatab++;
    111 		return (pfa);
    112 	}
    113 	use = fatab[0]->use;	/* replace least-recently used */
    114 	nuse = 0;
    115 	for (i = 1; i < nfatab; i++)
    116 		if (fatab[i]->use < use) {
    117 			use = fatab[i]->use;
    118 			nuse = i;
    119 		}
    120 	freefa(fatab[nuse]);
    121 	fatab[nuse] = pfa;
    122 	pfa->use = 1;
    123 	return (pfa);
    124 }
    125 
    126 fa *
    127 mkdfa(uchar *s, int anchor)	/* does the real work of making a dfa */
    128 	/* anchor = 1 for anchored matches, else 0 */
    129 {
    130 	Node *p, *p1;
    131 	fa *f;
    132 
    133 	p = reparse(s);
    134 	p1 = op2(CAT, op2(STAR, op2(ALL, NIL, NIL), NIL), p);
    135 		/* put ALL STAR in front of reg.  exp. */
    136 	p1 = op2(CAT, p1, op2(FINAL, NIL, NIL));
    137 		/* put FINAL after reg.  exp. */
    138 
    139 	poscnt = 0;
    140 	penter(p1);	/* enter parent pointers and leaf indices */
    141 	if ((f = (fa *)calloc(1, sizeof (fa) + poscnt * sizeof (rrow))) == NULL)
    142 		overflo("no room for fa");
    143 	/* penter has computed number of positions in re */
    144 	f->accept = poscnt-1;
    145 	cfoll(f, p1);	/* set up follow sets */
    146 	freetr(p1);
    147 	if ((f->posns[0] =
    148 	    (int *)calloc(1, *(f->re[0].lfollow) * sizeof (int))) == NULL) {
    149 			overflo("out of space in makedfa");
    150 	}
    151 	if ((f->posns[1] = (int *)calloc(1, sizeof (int))) == NULL)
    152 		overflo("out of space in makedfa");
    153 	*f->posns[1] = 0;
    154 	f->initstat = makeinit(f, anchor);
    155 	f->anchor = anchor;
    156 	f->restr = tostring(s);
    157 	return (f);
    158 }
    159 
    160 static int
    161 makeinit(fa *f, int anchor)
    162 {
    163 	register int i, k;
    164 
    165 	f->curstat = 2;
    166 	f->out[2] = 0;
    167 	f->reset = 0;
    168 	k = *(f->re[0].lfollow);
    169 	xfree(f->posns[2]);
    170 	if ((f->posns[2] = (int *)calloc(1, (k+1) * sizeof (int))) == NULL)
    171 		overflo("out of space in makeinit");
    172 	for (i = 0; i <= k; i++) {
    173 		(f->posns[2])[i] = (f->re[0].lfollow)[i];
    174 	}
    175 	if ((f->posns[2])[1] == f->accept)
    176 		f->out[2] = 1;
    177 	for (i = 0; i < NCHARS; i++)
    178 		f->gototab[2][i] = 0;
    179 	f->curstat = cgoto(f, 2, HAT);
    180 	if (anchor) {
    181 		*f->posns[2] = k-1;	/* leave out position 0 */
    182 		for (i = 0; i < k; i++) {
    183 			(f->posns[0])[i] = (f->posns[2])[i];
    184 		}
    185 
    186 		f->out[0] = f->out[2];
    187 		if (f->curstat != 2)
    188 			--(*f->posns[f->curstat]);
    189 	}
    190 	return (f->curstat);
    191 }
    192 
    193 void
    194 penter(Node *p)	/* set up parent pointers and leaf indices */
    195 {
    196 	switch (type(p)) {
    197 	LEAF
    198 		left(p) = (Node *) poscnt;
    199 		point[poscnt++] = p;
    200 		break;
    201 	UNARY
    202 		penter(left(p));
    203 		parent(left(p)) = p;
    204 		break;
    205 	case CAT:
    206 	case OR:
    207 		penter(left(p));
    208 		penter(right(p));
    209 		parent(left(p)) = p;
    210 		parent(right(p)) = p;
    211 		break;
    212 	default:
    213 		ERROR "unknown type %d in penter", type(p) FATAL;
    214 		break;
    215 	}
    216 }
    217 
    218 static void
    219 freetr(Node *p)	/* free parse tree */
    220 {
    221 	switch (type(p)) {
    222 	LEAF
    223 		xfree(p);
    224 		break;
    225 	UNARY
    226 		freetr(left(p));
    227 		xfree(p);
    228 		break;
    229 	case CAT:
    230 	case OR:
    231 		freetr(left(p));
    232 		freetr(right(p));
    233 		xfree(p);
    234 		break;
    235 	default:
    236 		ERROR "unknown type %d in freetr", type(p) FATAL;
    237 		break;
    238 	}
    239 }
    240 
    241 uchar *
    242 cclenter(uchar *p)
    243 {
    244 	register int i, c;
    245 	uchar *op, *chars, *ret;
    246 	size_t	bsize;
    247 
    248 	init_buf(&chars, &bsize, LINE_INCR);
    249 	op = p;
    250 	i = 0;
    251 	while ((c = *p++) != 0) {
    252 		if (c == '\\') {
    253 			if ((c = *p++) == 't')
    254 				c = '\t';
    255 			else if (c == 'n')
    256 				c = '\n';
    257 			else if (c == 'f')
    258 				c = '\f';
    259 			else if (c == 'r')
    260 				c = '\r';
    261 			else if (c == 'b')
    262 				c = '\b';
    263 			else if (c == '\\')
    264 				c = '\\';
    265 			else if (isdigit(c)) {
    266 				int n = c - '0';
    267 				if (isdigit(*p)) {
    268 					n = 8 * n + *p++ - '0';
    269 					if (isdigit(*p))
    270 						n = 8 * n + *p++ - '0';
    271 				}
    272 				c = n;
    273 			} /* else */
    274 				/* c = c; */
    275 		} else if (c == '-' && i > 0 && chars[i-1] != 0) {
    276 			if (*p != 0) {
    277 				c = chars[i-1];
    278 				while ((uchar)c < *p) {	/* fails if *p is \\ */
    279 					expand_buf(&chars, &bsize, i);
    280 					chars[i++] = ++c;
    281 				}
    282 				p++;
    283 				continue;
    284 			}
    285 		}
    286 		expand_buf(&chars, &bsize, i);
    287 		chars[i++] = c;
    288 	}
    289 	chars[i++] = '\0';
    290 	dprintf(("cclenter: in = |%s|, out = |%s|\n", op, chars));
    291 	xfree(op);
    292 	ret = tostring(chars);
    293 	free(chars);
    294 	return (ret);
    295 }
    296 
    297 static void
    298 overflo(char *s)
    299 {
    300 	ERROR "regular expression too big: %s", gettext((char *)s) FATAL;
    301 }
    302 
    303 /* enter follow set of each leaf of vertex v into lfollow[leaf] */
    304 static void
    305 cfoll(fa *f, Node *v)
    306 {
    307 	register int i;
    308 	register int *p;
    309 
    310 	switch (type(v)) {
    311 	LEAF
    312 		f->re[(int)left(v)].ltype = type(v);
    313 		f->re[(int)left(v)].lval = (int)right(v);
    314 		for (i = 0; i <= f->accept; i++)
    315 			setvec[i] = 0;
    316 		setcnt = 0;
    317 		follow(v);	/* computes setvec and setcnt */
    318 		if ((p = (int *)calloc(1, (setcnt+1) * sizeof (int))) == NULL)
    319 			overflo("follow set overflow");
    320 		f->re[(int)left(v)].lfollow = p;
    321 		*p = setcnt;
    322 		for (i = f->accept; i >= 0; i--) {
    323 			if (setvec[i] == 1)
    324 				*++p = i;
    325 		}
    326 		break;
    327 	UNARY
    328 		cfoll(f, left(v));
    329 		break;
    330 	case CAT:
    331 	case OR:
    332 		cfoll(f, left(v));
    333 		cfoll(f, right(v));
    334 		break;
    335 	default:
    336 		ERROR "unknown type %d in cfoll", type(v) FATAL;
    337 	}
    338 }
    339 
    340 /*
    341  * collects initially active leaves of p into setvec
    342  * returns 0 or 1 depending on whether p matches empty string
    343  */
    344 static int
    345 first(Node *p)
    346 {
    347 	register int b;
    348 
    349 	switch (type(p)) {
    350 	LEAF
    351 		if (setvec[(int)left(p)] != 1) {
    352 			setvec[(int)left(p)] = 1;
    353 			setcnt++;
    354 		}
    355 		if (type(p) == CCL && (*(uchar *)right(p)) == '\0')
    356 			return (0);		/* empty CCL */
    357 		else
    358 			return (1);
    359 	case PLUS:
    360 		if (first(left(p)) == 0)
    361 			return (0);
    362 		return (1);
    363 	case STAR:
    364 	case QUEST:
    365 		(void) first(left(p));
    366 		return (0);
    367 	case CAT:
    368 		if (first(left(p)) == 0 && first(right(p)) == 0)
    369 			return (0);
    370 		return (1);
    371 	case OR:
    372 		b = first(right(p));
    373 		if (first(left(p)) == 0 || b == 0)
    374 			return (0);
    375 		return (1);
    376 	}
    377 	ERROR "unknown type %d in first", type(p) FATAL;
    378 	return (-1);
    379 }
    380 
    381 /* collects leaves that can follow v into setvec */
    382 static void
    383 follow(Node *v)
    384 {
    385 	Node *p;
    386 
    387 	if (type(v) == FINAL)
    388 		return;
    389 	p = parent(v);
    390 	switch (type(p)) {
    391 	case STAR:
    392 	case PLUS:
    393 		(void) first(v);
    394 		follow(p);
    395 		return;
    396 
    397 	case OR:
    398 	case QUEST:
    399 		follow(p);
    400 		return;
    401 
    402 	case CAT:
    403 		if (v == left(p)) {	/* v is left child of p */
    404 			if (first(right(p)) == 0) {
    405 				follow(p);
    406 				return;
    407 			}
    408 		} else		/* v is right child */
    409 			follow(p);
    410 		return;
    411 	default:
    412 		ERROR "unknown type %d in follow", type(p) FATAL;
    413 		break;
    414 	}
    415 }
    416 
    417 static int
    418 member(uchar c, uchar *s)	/* is c in s? */
    419 {
    420 	while (*s)
    421 		if (c == *s++)
    422 			return (1);
    423 	return (0);
    424 }
    425 
    426 
    427 int
    428 match(fa *f, uchar *p)
    429 {
    430 	register int s, ns;
    431 
    432 	s = f->reset ? makeinit(f, 0) : f->initstat;
    433 	if (f->out[s])
    434 		return (1);
    435 	do {
    436 		if ((ns = f->gototab[s][*p]) != 0)
    437 			s = ns;
    438 		else
    439 			s = cgoto(f, s, *p);
    440 		if (f->out[s])
    441 			return (1);
    442 	} while (*p++ != 0);
    443 	return (0);
    444 }
    445 
    446 int
    447 pmatch(fa *f, uchar *p)
    448 {
    449 	register int s, ns;
    450 	register uchar *q;
    451 	int i, k;
    452 
    453 	if (f->reset) {
    454 		f->initstat = s = makeinit(f, 1);
    455 	} else {
    456 		s = f->initstat;
    457 	}
    458 	patbeg = p;
    459 	patlen = -1;
    460 	do {
    461 		q = p;
    462 		do {
    463 			if (f->out[s])		/* final state */
    464 				patlen = q-p;
    465 			if ((ns = f->gototab[s][*q]) != 0)
    466 				s = ns;
    467 			else
    468 				s = cgoto(f, s, *q);
    469 			if (s == 1) {	/* no transition */
    470 				if (patlen >= 0) {
    471 					patbeg = p;
    472 					return (1);
    473 				} else
    474 					goto nextin;	/* no match */
    475 			}
    476 		} while (*q++ != 0);
    477 		if (f->out[s])
    478 			patlen = q - p - 1;	/* don't count $ */
    479 		if (patlen >= 0) {
    480 			patbeg = p;
    481 			return (1);
    482 		}
    483 	nextin:
    484 		s = 2;
    485 		if (f->reset) {
    486 			for (i = 2; i <= f->curstat; i++)
    487 				xfree(f->posns[i]);
    488 			k = *f->posns[0];
    489 			if ((f->posns[2] =
    490 			    (int *)calloc(1, (k + 1) * sizeof (int))) == NULL) {
    491 				overflo("out of space in pmatch");
    492 			}
    493 			for (i = 0; i <= k; i++)
    494 				(f->posns[2])[i] = (f->posns[0])[i];
    495 			f->initstat = f->curstat = 2;
    496 			f->out[2] = f->out[0];
    497 			for (i = 0; i < NCHARS; i++)
    498 				f->gototab[2][i] = 0;
    499 		}
    500 	} while (*p++ != 0);
    501 	return (0);
    502 }
    503 
    504 int
    505 nematch(fa *f, uchar *p)
    506 {
    507 	register int s, ns;
    508 	register uchar *q;
    509 	int i, k;
    510 
    511 	if (f->reset) {
    512 		f->initstat = s = makeinit(f, 1);
    513 	} else {
    514 		s = f->initstat;
    515 	}
    516 	patlen = -1;
    517 	while (*p) {
    518 		q = p;
    519 		do {
    520 			if (f->out[s])		/* final state */
    521 				patlen = q-p;
    522 			if ((ns = f->gototab[s][*q]) != 0)
    523 				s = ns;
    524 			else
    525 				s = cgoto(f, s, *q);
    526 			if (s == 1) {	/* no transition */
    527 				if (patlen > 0) {
    528 					patbeg = p;
    529 					return (1);
    530 				} else
    531 					goto nnextin;	/* no nonempty match */
    532 			}
    533 		} while (*q++ != 0);
    534 		if (f->out[s])
    535 			patlen = q - p - 1;	/* don't count $ */
    536 		if (patlen > 0) {
    537 			patbeg = p;
    538 			return (1);
    539 		}
    540 	nnextin:
    541 		s = 2;
    542 		if (f->reset) {
    543 			for (i = 2; i <= f->curstat; i++)
    544 				xfree(f->posns[i]);
    545 			k = *f->posns[0];
    546 			if ((f->posns[2] =
    547 			    (int *)calloc(1, (k + 1) * sizeof (int))) == NULL) {
    548 				overflo("out of state space");
    549 			}
    550 			for (i = 0; i <= k; i++)
    551 				(f->posns[2])[i] = (f->posns[0])[i];
    552 			f->initstat = f->curstat = 2;
    553 			f->out[2] = f->out[0];
    554 			for (i = 0; i < NCHARS; i++)
    555 				f->gototab[2][i] = 0;
    556 		}
    557 		p++;
    558 	}
    559 	return (0);
    560 }
    561 
    562 static Node *regexp(void), *primary(void), *concat(Node *);
    563 static Node *alt(Node *), *unary(Node *);
    564 
    565 static Node *
    566 reparse(uchar *p)
    567 {
    568 	/* parses regular expression pointed to by p */
    569 	/* uses relex() to scan regular expression */
    570 	Node *np;
    571 
    572 	dprintf(("reparse <%s>\n", p));
    573 	lastre = prestr = p;	/* prestr points to string to be parsed */
    574 	rtok = relex();
    575 	if (rtok == '\0')
    576 		ERROR "empty regular expression" FATAL;
    577 	np = regexp();
    578 	if (rtok == '\0') {
    579 		return (np);
    580 	} else {
    581 		ERROR "syntax error in regular expression %s at %s",
    582 		    lastre, prestr FATAL;
    583 	}
    584 	/*NOTREACHED*/
    585 	return (NULL);
    586 }
    587 
    588 static Node *
    589 regexp(void)
    590 {
    591 	return (alt(concat(primary())));
    592 }
    593 
    594 static Node *
    595 primary(void)
    596 {
    597 	Node *np;
    598 
    599 	switch (rtok) {
    600 	case CHAR:
    601 		np = op2(CHAR, NIL, (Node *)rlxval);
    602 		rtok = relex();
    603 		return (unary(np));
    604 	case ALL:
    605 		rtok = relex();
    606 		return (unary(op2(ALL, NIL, NIL)));
    607 	case DOT:
    608 		rtok = relex();
    609 		return (unary(op2(DOT, NIL, NIL)));
    610 	case CCL:
    611 		/*LINTED align*/
    612 		np = op2(CCL, NIL, (Node *)cclenter(rlxstr));
    613 		rtok = relex();
    614 		return (unary(np));
    615 	case NCCL:
    616 		/*LINTED align*/
    617 		np = op2(NCCL, NIL, (Node *)cclenter(rlxstr));
    618 		rtok = relex();
    619 		return (unary(np));
    620 	case '^':
    621 		rtok = relex();
    622 		return (unary(op2(CHAR, NIL, (Node *)HAT)));
    623 	case '$':
    624 		rtok = relex();
    625 		return (unary(op2(CHAR, NIL, NIL)));
    626 	case '(':
    627 		rtok = relex();
    628 		if (rtok == ')') {	/* special pleading for () */
    629 			rtok = relex();
    630 			return (unary(op2(CCL, NIL,
    631 			    /*LINTED align*/
    632 			    (Node *)tostring((uchar *)""))));
    633 		}
    634 		np = regexp();
    635 		if (rtok == ')') {
    636 			rtok = relex();
    637 			return (unary(np));
    638 		} else {
    639 			ERROR "syntax error in regular expression %s at %s",
    640 			    lastre, prestr FATAL;
    641 		}
    642 	default:
    643 		ERROR "illegal primary in regular expression %s at %s",
    644 		    lastre, prestr FATAL;
    645 	}
    646 	/*NOTREACHED*/
    647 	return (NULL);
    648 }
    649 
    650 static Node *
    651 concat(Node *np)
    652 {
    653 	switch (rtok) {
    654 	case CHAR: case DOT: case ALL: case CCL: case NCCL: case '$': case '(':
    655 		return (concat(op2(CAT, np, primary())));
    656 	default:
    657 		return (np);
    658 	}
    659 }
    660 
    661 static Node *
    662 alt(Node *np)
    663 {
    664 	if (rtok == OR) {
    665 		rtok = relex();
    666 		return (alt(op2(OR, np, concat(primary()))));
    667 	}
    668 	return (np);
    669 }
    670 
    671 static Node *
    672 unary(Node *np)
    673 {
    674 	switch (rtok) {
    675 	case STAR:
    676 		rtok = relex();
    677 		return (unary(op2(STAR, np, NIL)));
    678 	case PLUS:
    679 		rtok = relex();
    680 		return (unary(op2(PLUS, np, NIL)));
    681 	case QUEST:
    682 		rtok = relex();
    683 		return (unary(op2(QUEST, np, NIL)));
    684 	default:
    685 		return (np);
    686 	}
    687 }
    688 
    689 static int
    690 relex(void)		/* lexical analyzer for reparse */
    691 {
    692 	register int c;
    693 	uchar *cbuf;
    694 	int clen, cflag;
    695 
    696 	switch (c = *prestr++) {
    697 	case '|': return OR;
    698 	case '*': return STAR;
    699 	case '+': return PLUS;
    700 	case '?': return QUEST;
    701 	case '.': return DOT;
    702 	case '\0': prestr--; return '\0';
    703 	case '^':
    704 	case '$':
    705 	case '(':
    706 	case ')':
    707 		return (c);
    708 	case '\\':
    709 		if ((c = *prestr++) == 't')
    710 			c = '\t';
    711 		else if (c == 'n')
    712 			c = '\n';
    713 		else if (c == 'f')
    714 			c = '\f';
    715 		else if (c == 'r')
    716 			c = '\r';
    717 		else if (c == 'b')
    718 			c = '\b';
    719 		else if (c == '\\')
    720 			c = '\\';
    721 		else if (isdigit(c)) {
    722 			int n = c - '0';
    723 			if (isdigit(*prestr)) {
    724 				n = 8 * n + *prestr++ - '0';
    725 				if (isdigit(*prestr))
    726 					n = 8 * n + *prestr++ - '0';
    727 			}
    728 			c = n;
    729 		} /* else it's now in c */
    730 		rlxval = c;
    731 		return (CHAR);
    732 	default:
    733 		rlxval = c;
    734 		return (CHAR);
    735 	case '[':
    736 		clen = 0;
    737 		if (*prestr == '^') {
    738 			cflag = 1;
    739 			prestr++;
    740 		} else
    741 			cflag = 0;
    742 		init_buf(&cbuf, NULL, strlen((char *)prestr) * 2 + 1);
    743 		for (;;) {
    744 			if ((c = *prestr++) == '\\') {
    745 				cbuf[clen++] = '\\';
    746 				if ((c = *prestr++) == '\0') {
    747 					ERROR
    748 			"nonterminated character class %s", lastre FATAL;
    749 				}
    750 				cbuf[clen++] = c;
    751 			} else if (c == ']') {
    752 				cbuf[clen] = 0;
    753 				rlxstr = tostring(cbuf);
    754 				free(cbuf);
    755 				if (cflag == 0)
    756 					return (CCL);
    757 				else
    758 					return (NCCL);
    759 			} else if (c == '\n') {
    760 				ERROR "newline in character class %s...",
    761 				    lastre FATAL;
    762 			} else if (c == '\0') {
    763 				ERROR "nonterminated character class %s",
    764 				    lastre FATAL;
    765 			} else
    766 				cbuf[clen++] = c;
    767 		}
    768 		/*NOTREACHED*/
    769 	}
    770 	return (0);
    771 }
    772 
    773 static int
    774 cgoto(fa *f, int s, int c)
    775 {
    776 	register int i, j, k;
    777 	register int *p, *q;
    778 
    779 	for (i = 0; i <= f->accept; i++)
    780 		setvec[i] = 0;
    781 	setcnt = 0;
    782 	/* compute positions of gototab[s,c] into setvec */
    783 	p = f->posns[s];
    784 	for (i = 1; i <= *p; i++) {
    785 		if ((k = f->re[p[i]].ltype) != FINAL) {
    786 			if (k == CHAR && c == f->re[p[i]].lval ||
    787 			    k == DOT && c != 0 && c != HAT ||
    788 			    k == ALL && c != 0 ||
    789 			    k == CCL &&
    790 			    member(c, (uchar *)f->re[p[i]].lval) ||
    791 			    k == NCCL &&
    792 			    !member(c, (uchar *)f->re[p[i]].lval) &&
    793 			    c != 0 && c != HAT) {
    794 				q = f->re[p[i]].lfollow;
    795 				for (j = 1; j <= *q; j++) {
    796 					if (setvec[q[j]] == 0) {
    797 						setcnt++;
    798 						setvec[q[j]] = 1;
    799 					}
    800 				}
    801 			}
    802 		}
    803 	}
    804 	/* determine if setvec is a previous state */
    805 	tmpset[0] = setcnt;
    806 	j = 1;
    807 	for (i = f->accept; i >= 0; i--)
    808 		if (setvec[i]) {
    809 			tmpset[j++] = i;
    810 		}
    811 	/* tmpset == previous state? */
    812 	for (i = 1; i <= f->curstat; i++) {
    813 		p = f->posns[i];
    814 		if ((k = tmpset[0]) != p[0])
    815 			goto different;
    816 		for (j = 1; j <= k; j++)
    817 			if (tmpset[j] != p[j])
    818 				goto different;
    819 		/* setvec is state i */
    820 		f->gototab[s][c] = i;
    821 		return (i);
    822 	different:;
    823 	}
    824 
    825 	/* add tmpset to current set of states */
    826 	if (f->curstat >= NSTATES-1) {
    827 		f->curstat = 2;
    828 		f->reset = 1;
    829 		for (i = 2; i < NSTATES; i++)
    830 			xfree(f->posns[i]);
    831 	} else
    832 		++(f->curstat);
    833 	for (i = 0; i < NCHARS; i++)
    834 		f->gototab[f->curstat][i] = 0;
    835 	xfree(f->posns[f->curstat]);
    836 	if ((p = (int *)calloc(1, (setcnt + 1) * sizeof (int))) == NULL)
    837 		overflo("out of space in cgoto");
    838 
    839 	f->posns[f->curstat] = p;
    840 	f->gototab[s][c] = f->curstat;
    841 	for (i = 0; i <= setcnt; i++)
    842 		p[i] = tmpset[i];
    843 	if (setvec[f->accept])
    844 		f->out[f->curstat] = 1;
    845 	else
    846 		f->out[f->curstat] = 0;
    847 	return (f->curstat);
    848 }
    849 
    850 static void
    851 freefa(fa *f)
    852 {
    853 
    854 	register int i;
    855 
    856 	if (f == NULL)
    857 		return;
    858 	for (i = 0; i <= f->curstat; i++)
    859 		xfree(f->posns[i]);
    860 	for (i = 0; i <= f->accept; i++)
    861 		xfree(f->re[i].lfollow);
    862 	xfree(f->restr);
    863 	xfree(f);
    864 }
    865