Home | History | Annotate | Download | only in awk_xpg4
      1 /*
      2  * CDDL HEADER START
      3  *
      4  * The contents of this file are subject to the terms of the
      5  * Common Development and Distribution License (the "License").
      6  * You may not use this file except in compliance with the License.
      7  *
      8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
      9  * or http://www.opensolaris.org/os/licensing.
     10  * See the License for the specific language governing permissions
     11  * and limitations under the License.
     12  *
     13  * When distributing Covered Code, include this CDDL HEADER in each
     14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
     15  * If applicable, add the following below this CDDL HEADER, with the
     16  * fields enclosed by brackets "[]" replaced with your own identifying
     17  * information: Portions Copyright [yyyy] [name of copyright owner]
     18  *
     19  * CDDL HEADER END
     20  */
     21 /*
     22  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
     23  * Use is subject to license terms.
     24  */
     25 
     26 /*
     27  * Copyright 1986, 1994 by Mortice Kern Systems Inc.  All rights reserved.
     28  */
     29 
     30 #pragma ident	"%Z%%M%	%I%	%E% SMI"
     31 
     32 /*
     33  * awk -- mainline, yylex, etc.
     34  *
     35  * Based on MKS awk(1) ported to be /usr/xpg4/bin/awk with POSIX/XCU4 changes
     36  */
     37 
     38 #include "awk.h"
     39 #include "y.tab.h"
     40 #include <stdarg.h>
     41 #include <unistd.h>
     42 #include <locale.h>
     43 #include <search.h>
     44 
     45 static char	*progfiles[NPFILE];	/* Programmes files for yylex */
     46 static char	**progfilep = &progfiles[0]; /* Pointer to last file */
     47 static wchar_t	*progptr;		/* In-memory programme */
     48 static int	proglen;		/* Length of progptr */
     49 static wchar_t	context[NCONTEXT];	/* Circular buffer of context */
     50 static wchar_t	*conptr = &context[0];	/* context ptr */
     51 static FILE	*progfp;		/* Stdio stream for programme */
     52 static char	*filename;
     53 #ifdef	DEBUG
     54 static int	dflag;
     55 #endif
     56 
     57 #define	AWK_EXEC_MAGIC	"<MKS AWKC>"
     58 #define	LEN_EXEC_MAGIC	10
     59 
     60 static char	unbal[] = "unbalanced E char";
     61 
     62 static void	awkarginit(int c, char **av);
     63 static int	lexid(wint_t c);
     64 static int	lexnumber(wint_t c);
     65 static int	lexstring(wint_t endc);
     66 static int	lexregexp(wint_t endc);
     67 
     68 static void	awkvarinit(void);
     69 static wint_t	lexgetc(void);
     70 static void	lexungetc(wint_t c);
     71 static size_t	lexescape(wint_t endc, int regx, int cmd_line_operand);
     72 static void	awkierr(int perr, char *fmt, va_list ap);
     73 static int	usage(void);
     74 void		strescape(wchar_t *str);
     75 static const char	*toprint(wint_t);
     76 char *_cmdname;
     77 static wchar_t *mbconvert(char *str);
     78 
     79 extern int	isclvar(wchar_t *arg);
     80 
     81 /*
     82  * mainline for awk
     83  */
     84 int
     85 main(int argc, char *argv[])
     86 {
     87 	wchar_t *ap;
     88 	char *cmd;
     89 
     90 	cmd = argv[0];
     91 	_cmdname = cmd;
     92 
     93 	linebuf = emalloc(NLINE * sizeof (wchar_t));
     94 
     95 	/*
     96 	 * At this point only messaging should be internationalized.
     97 	 * numbers are still scanned as in the Posix locale.
     98 	 */
     99 	(void) setlocale(LC_ALL, "");
    100 	(void) setlocale(LC_NUMERIC, "C");
    101 #if !defined(TEXT_DOMAIN)
    102 #define	TEXT_DOMAIN	"SYS_TEST"
    103 #endif
    104 	(void) textdomain(TEXT_DOMAIN);
    105 
    106 	awkvarinit();
    107 	/* running = 1; */
    108 	while (argc > 1 && *argv[1] == '-') {
    109 		void *save_ptr = NULL;
    110 		ap = mbstowcsdup(&argv[1][1]);
    111 		if (ap == NULL)
    112 			break;
    113 		if (*ap == '\0') {
    114 			free(ap);
    115 			break;
    116 		}
    117 		save_ptr = (void *) ap;
    118 		++argv;
    119 		--argc;
    120 		if (*ap == '-' && ap[1] == '\0')
    121 			break;
    122 		for (; *ap != '\0'; ++ap) {
    123 			switch (*ap) {
    124 #ifdef DEBUG
    125 			case 'd':
    126 				dflag = 1;
    127 				continue;
    128 
    129 #endif
    130 			case 'f':
    131 				if (argc < 2) {
    132 					(void) fprintf(stderr,
    133 				gettext("Missing script file\n"));
    134 					return (1);
    135 				}
    136 				*progfilep++ = argv[1];
    137 				--argc;
    138 				++argv;
    139 				continue;
    140 
    141 			case 'F':
    142 				if (ap[1] == '\0') {
    143 					if (argc < 2) {
    144 						(void) fprintf(stderr,
    145 				gettext("Missing field separator\n"));
    146 						return (1);
    147 					}
    148 					ap = mbstowcsdup(argv[1]);
    149 					--argc;
    150 					++argv;
    151 				} else
    152 					++ap;
    153 				strescape(ap);
    154 				strassign(varFS, linebuf, FALLOC,
    155 				    wcslen(linebuf));
    156 				break;
    157 
    158 			case 'v': {
    159 				wchar_t *vp;
    160 				wchar_t *arg;
    161 
    162 				if (argc < 2) {
    163 					(void) fprintf(stderr,
    164 		gettext("Missing variable assignment\n"));
    165 					return (1);
    166 				}
    167 				arg = mbconvert(argv[1]);
    168 				/*
    169 				 * Ensure the variable expression
    170 				 * is valid (correct form).
    171 				 */
    172 				if (((vp = wcschr(arg, '=')) != NULL) &&
    173 				    isclvar(arg)) {
    174 					*vp = '\0';
    175 					strescape(vp+1);
    176 					strassign(vlook(arg), linebuf,
    177 					    FALLOC|FSENSE,
    178 					    wcslen(linebuf));
    179 					*vp = '=';
    180 				} else {
    181 					(void) fprintf(stderr, gettext(
    182 					    "Invalid form for variable "
    183 					    "assignment: %S\n"), arg);
    184 					return (1);
    185 				}
    186 				--argc;
    187 				++argv;
    188 				continue;
    189 			}
    190 
    191 			default:
    192 				(void) fprintf(stderr,
    193 				gettext("Unknown option \"-%S\"\n"), ap);
    194 				return (usage());
    195 			}
    196 			break;
    197 		}
    198 		if (save_ptr)
    199 			free(save_ptr);
    200 	}
    201 	if (progfilep == &progfiles[0]) {
    202 		if (argc < 2)
    203 			return (usage());
    204 		filename = "[command line]";	/* BUG: NEEDS TRANSLATION */
    205 		progptr = mbstowcsdup(argv[1]);
    206 		proglen = wcslen(progptr);
    207 		--argc;
    208 		++argv;
    209 	}
    210 
    211 	argv[0] = cmd;
    212 
    213 	awkarginit(argc, argv);
    214 
    215 	/* running = 0; */
    216 	(void) yyparse();
    217 
    218 	lineno = 0;
    219 	/*
    220 	 * Ok, done parsing, so now activate the rest of the nls stuff, set
    221 	 * the radix character.
    222 	 */
    223 	(void) setlocale(LC_ALL, "");
    224 	radixpoint = *localeconv()->decimal_point;
    225 	awk();
    226 	/* NOTREACHED */
    227 	return (0);
    228 }
    229 
    230 /*
    231  * Do initial setup of buffers, etc.
    232  * This must be called before most processing
    233  * and especially before lexical analysis.
    234  * Variables initialised here will be overruled by command
    235  * line parameter initialisation.
    236  */
    237 static void
    238 awkvarinit()
    239 {
    240 	NODE *np;
    241 
    242 	(void) setvbuf(stderr, NULL, _IONBF, 0);
    243 
    244 	if ((NIOSTREAM = sysconf(_SC_OPEN_MAX) - 4) <= 0) {
    245 		(void) fprintf(stderr,
    246 	gettext("not enough available file descriptors"));
    247 		exit(1);
    248 	}
    249 	ofiles = (OFILE *)emalloc(sizeof (OFILE)*NIOSTREAM);
    250 #ifdef A_ZERO_POINTERS
    251 	(void) memset((wchar_t *)ofiles, 0, sizeof (OFILE) * NIOSTREAM);
    252 #else
    253 	{
    254 		/* initialize file descriptor table */
    255 		OFILE *fp;
    256 		for (fp = ofiles; fp < &ofiles[NIOSTREAM]; fp += 1) {
    257 			fp->f_fp = FNULL;
    258 					fp->f_mode = 0;
    259 					fp->f_name = (char *)0;
    260 		}
    261 	}
    262 #endif
    263 	constant = intnode((INT)0);
    264 
    265 	const0 = intnode((INT)0);
    266 	const1 = intnode((INT)1);
    267 	constundef = emptynode(CONSTANT, 0);
    268 	constundef->n_flags = FSTRING|FVINT;
    269 	constundef->n_string = _null;
    270 	constundef->n_strlen = 0;
    271 	inc_oper = emptynode(ADD, 0);
    272 	inc_oper->n_right = const1;
    273 	asn_oper = emptynode(ADD, 0);
    274 	field0 = node(FIELD, const0, NNULL);
    275 
    276 	{
    277 		RESFUNC near*rp;
    278 
    279 		for (rp = &resfuncs[0]; rp->rf_name != (LOCCHARP)NULL; ++rp) {
    280 			np = finstall(rp->rf_name, rp->rf_func, rp->rf_type);
    281 		}
    282 	}
    283 	{
    284 		RESERVED near*rp;
    285 
    286 		for (rp = &reserved[0]; rp->r_name != (LOCCHARP)NULL; ++rp) {
    287 			switch (rp->r_type) {
    288 			case SVAR:
    289 			case VAR:
    290 				running = 1;
    291 				np = vlook(rp->r_name);
    292 				if (rp->r_type == SVAR)
    293 					np->n_flags |= FSPECIAL;
    294 				if (rp->r_svalue != NULL)
    295 					strassign(np, rp->r_svalue, FSTATIC,
    296 					    (size_t)rp->r_ivalue);
    297 				else {
    298 					constant->n_int = rp->r_ivalue;
    299 					(void) assign(np, constant);
    300 				}
    301 				running = 0;
    302 				break;
    303 
    304 			case KEYWORD:
    305 				kinstall(rp->r_name, (int)rp->r_ivalue);
    306 				break;
    307 			}
    308 		}
    309 	}
    310 
    311 	varNR = vlook(s_NR);
    312 	varFNR = vlook(s_FNR);
    313 	varNF = vlook(s_NF);
    314 	varOFMT = vlook(s_OFMT);
    315 	varCONVFMT = vlook(s_CONVFMT);
    316 	varOFS = vlook(s_OFS);
    317 	varORS = vlook(s_ORS);
    318 	varRS = vlook(s_RS);
    319 	varFS = vlook(s_FS);
    320 	varARGC = vlook(s_ARGC);
    321 	varSUBSEP = vlook(s_SUBSEP);
    322 	varENVIRON = vlook(s_ENVIRON);
    323 	varFILENAME = vlook(s_FILENAME);
    324 	varSYMTAB = vlook(s_SYMTAB);
    325 	incNR = node(ASG, varNR, node(ADD, varNR, const1));
    326 	incFNR = node(ASG, varFNR, node(ADD, varFNR, const1));
    327 	clrFNR = node(ASG, varFNR, const0);
    328 }
    329 
    330 /*
    331  * Initialise awk ARGC, ARGV variables.
    332  */
    333 static void
    334 awkarginit(int ac, char **av)
    335 {
    336 	int i;
    337 	wchar_t *cp;
    338 
    339 	ARGVsubi = node(INDEX, vlook(s_ARGV), constant);
    340 	running = 1;
    341 	constant->n_int = ac;
    342 	(void) assign(varARGC, constant);
    343 	for (i = 0; i < ac; ++i) {
    344 		cp = mbstowcsdup(av[i]);
    345 		constant->n_int = i;
    346 		strassign(exprreduce(ARGVsubi), cp,
    347 		    FSTATIC|FSENSE, wcslen(cp));
    348 	}
    349 	running = 0;
    350 }
    351 
    352 /*
    353  * Clean up when done parsing a function.
    354  * All formal parameters, because of a deal (funparm) in
    355  * yylex, get put into the symbol table in front of any
    356  * global variable of the same name.  When the entire
    357  * function is parsed, remove these formal dummy nodes
    358  * from the symbol table but retain the nodes because
    359  * the generated tree points at them.
    360  */
    361 void
    362 uexit(NODE *np)
    363 {
    364 	NODE *formal;
    365 
    366 	while ((formal = getlist(&np)) != NNULL)
    367 		delsymtab(formal, 0);
    368 }
    369 
    370 /*
    371  * The lexical analyzer.
    372  */
    373 int
    374 yylex()
    375 #ifdef	DEBUG
    376 {
    377 	int l;
    378 
    379 	l = yyhex();
    380 	if (dflag)
    381 		(void) printf("%d\n", l);
    382 	return (l);
    383 }
    384 yyhex()
    385 #endif
    386 {
    387 	wint_t c, c1;
    388 	int i;
    389 	static int savetoken = 0;
    390 	static int wasfield;
    391 	static int isfuncdef;
    392 	static int nbrace, nparen, nbracket;
    393 	static struct ctosymstruct {
    394 		wint_t c, sym;
    395 	} ctosym[] = {
    396 		{ '|', BAR },		{ '^', CARAT },
    397 		{ '~', TILDE },		{ '<', LANGLE },
    398 		{ '>', RANGLE },	{ '+', PLUSC },
    399 		{ '-', HYPHEN },	{ '*', STAR },
    400 		{ '/', SLASH },		{ '%', PERCENT },
    401 		{ '!', EXCLAMATION },	{ '$', DOLLAR },
    402 		{ '[', LSQUARE },	{ ']', RSQUARE },
    403 		{ '(', LPAREN },	{ ')', RPAREN },
    404 		{ ';', SEMI },		{ '{', LBRACE },
    405 		{ '}', RBRACE },	{   0, 0 }
    406 	};
    407 
    408 	if (savetoken) {
    409 		c = savetoken;
    410 		savetoken = 0;
    411 	} else if (redelim != '\0') {
    412 		c = redelim;
    413 		redelim = 0;
    414 		catterm = 0;
    415 		savetoken = c;
    416 		return (lexlast = lexregexp(c));
    417 	} else while ((c = lexgetc()) != WEOF) {
    418 		if (iswalpha(c) || c == '_') {
    419 			c = lexid(c);
    420 		} else if (iswdigit(c) || c == '.') {
    421 			c = lexnumber(c);
    422 		} else if (isWblank(c)) {
    423 			continue;
    424 		} else switch (c) {
    425 #if DOS || OS2
    426 		case 032:		/* ^Z */
    427 			continue;
    428 #endif
    429 
    430 		case '"':
    431 			c = lexstring(c);
    432 			break;
    433 
    434 		case '#':
    435 			while ((c = lexgetc()) != '\n' && c != WEOF)
    436 				;
    437 			lexungetc(c);
    438 			continue;
    439 
    440 		case '+':
    441 			if ((c1 = lexgetc()) == '+')
    442 				c = INC;
    443 			else if (c1 == '=')
    444 				c = AADD;
    445 			else
    446 				lexungetc(c1);
    447 			break;
    448 
    449 		case '-':
    450 			if ((c1 = lexgetc()) == '-')
    451 				c = DEC;
    452 			else if (c1 == '=')
    453 				c = ASUB;
    454 			else
    455 				lexungetc(c1);
    456 			break;
    457 
    458 		case '*':
    459 			if ((c1 = lexgetc()) == '=')
    460 				c = AMUL;
    461 			else if (c1 == '*') {
    462 				if ((c1 = lexgetc()) == '=')
    463 					c = AEXP;
    464 				else {
    465 					c = EXP;
    466 					lexungetc(c1);
    467 				}
    468 			} else
    469 				lexungetc(c1);
    470 			break;
    471 
    472 		case '^':
    473 			if ((c1 = lexgetc()) == '=') {
    474 				c = AEXP;
    475 			} else {
    476 				c = EXP;
    477 				lexungetc(c1);
    478 			}
    479 			break;
    480 
    481 		case '/':
    482 			if ((c1 = lexgetc()) == '=' &&
    483 			    lexlast != RE && lexlast != NRE &&
    484 			    lexlast != ';' && lexlast != '\n' &&
    485 			    lexlast != ',' && lexlast != '(')
    486 				c = ADIV;
    487 			else
    488 				lexungetc(c1);
    489 			break;
    490 
    491 		case '%':
    492 			if ((c1 = lexgetc()) == '=')
    493 				c = AREM;
    494 			else
    495 				lexungetc(c1);
    496 			break;
    497 
    498 		case '&':
    499 			if ((c1 = lexgetc()) == '&')
    500 				c = AND;
    501 			else
    502 				lexungetc(c1);
    503 			break;
    504 
    505 		case '|':
    506 			if ((c1 = lexgetc()) == '|')
    507 				c = OR;
    508 			else {
    509 				lexungetc(c1);
    510 				if (inprint)
    511 					c = PIPE;
    512 			}
    513 			break;
    514 
    515 		case '>':
    516 			if ((c1 = lexgetc()) == '=')
    517 				c = GE;
    518 			else if (c1 == '>')
    519 				c = APPEND;
    520 			else {
    521 				lexungetc(c1);
    522 				if (nparen == 0 && inprint)
    523 					c = WRITE;
    524 			}
    525 			break;
    526 
    527 		case '<':
    528 			if ((c1 = lexgetc()) == '=')
    529 				c = LE;
    530 			else
    531 				lexungetc(c1);
    532 			break;
    533 
    534 		case '!':
    535 			if ((c1 = lexgetc()) == '=')
    536 				c = NE;
    537 			else if (c1 == '~')
    538 				c = NRE;
    539 			else
    540 				lexungetc(c1);
    541 			break;
    542 
    543 		case '=':
    544 			if ((c1 = lexgetc()) == '=')
    545 				c = EQ;
    546 			else {
    547 				lexungetc(c1);
    548 				c = ASG;
    549 			}
    550 			break;
    551 
    552 		case '\n':
    553 			switch (lexlast) {
    554 			case ')':
    555 				if (catterm || inprint) {
    556 					c = ';';
    557 					break;
    558 				}
    559 			/*FALLTHRU*/
    560 			case AND:
    561 			case OR:
    562 			case COMMA:
    563 			case '{':
    564 			case ELSE:
    565 			case ';':
    566 			case DO:
    567 				continue;
    568 
    569 			case '}':
    570 				if (nbrace != 0)
    571 					continue;
    572 
    573 			default:
    574 				c = ';';
    575 				break;
    576 			}
    577 			break;
    578 
    579 		case ELSE:
    580 			if (lexlast != ';') {
    581 				savetoken = ELSE;
    582 				c = ';';
    583 			}
    584 			break;
    585 
    586 		case '(':
    587 			++nparen;
    588 			break;
    589 
    590 		case ')':
    591 			if (--nparen < 0)
    592 				awkerr(unbal, "()");
    593 			break;
    594 
    595 		case '{':
    596 			nbrace++;
    597 			break;
    598 
    599 		case '}':
    600 			if (--nbrace < 0) {
    601 				char brk[3];
    602 
    603 				brk[0] = '{';
    604 				brk[1] = '}';
    605 				brk[2] = '\0';
    606 				awkerr(unbal, brk);
    607 			}
    608 			if (lexlast != ';') {
    609 				savetoken = c;
    610 				c = ';';
    611 			}
    612 			break;
    613 
    614 		case '[':
    615 			++nbracket;
    616 			break;
    617 
    618 		case ']':
    619 			if (--nbracket < 0) {
    620 				char brk[3];
    621 
    622 				brk[0] = '[';
    623 				brk[1] = ']';
    624 				brk[2] = '\0';
    625 				awkerr(unbal, brk);
    626 			}
    627 			break;
    628 
    629 		case '\\':
    630 			if ((c1 = lexgetc()) == '\n')
    631 				continue;
    632 			lexungetc(c1);
    633 			break;
    634 
    635 		case ',':
    636 			c = COMMA;
    637 			break;
    638 
    639 		case '?':
    640 			c = QUEST;
    641 			break;
    642 
    643 		case ':':
    644 			c = COLON;
    645 			break;
    646 
    647 		default:
    648 			if (!iswprint(c))
    649 				awkerr(
    650 				    gettext("invalid character \"%s\""),
    651 				    toprint(c));
    652 			break;
    653 		}
    654 		break;
    655 	}
    656 
    657 	switch (c) {
    658 	case ']':
    659 		++catterm;
    660 		break;
    661 
    662 	case VAR:
    663 		if (catterm) {
    664 			savetoken = c;
    665 			c = CONCAT;
    666 			catterm = 0;
    667 		} else if (!isfuncdef) {
    668 			if ((c1 = lexgetc()) != '(')
    669 				++catterm;
    670 			lexungetc(c1);
    671 		}
    672 		isfuncdef = 0;
    673 		break;
    674 
    675 	case PARM:
    676 	case CONSTANT:
    677 		if (catterm) {
    678 			savetoken = c;
    679 			c = CONCAT;
    680 			catterm = 0;
    681 		} else {
    682 			if (lexlast == '$')
    683 				wasfield = 2;
    684 			++catterm;
    685 		}
    686 		break;
    687 
    688 	case INC:
    689 	case DEC:
    690 		if (!catterm || lexlast != CONSTANT || wasfield)
    691 			break;
    692 
    693 	/*FALLTHRU*/
    694 	case UFUNC:
    695 	case FUNC:
    696 	case GETLINE:
    697 	case '!':
    698 	case '$':
    699 	case '(':
    700 		if (catterm) {
    701 			savetoken = c;
    702 			c = CONCAT;
    703 			catterm = 0;
    704 		}
    705 		break;
    706 
    707 	/* { */ case '}':
    708 		if (nbrace == 0)
    709 			savetoken = ';';
    710 	/*FALLTHRU*/
    711 	case ';':
    712 		inprint = 0;
    713 	/*FALLTHRU*/
    714 	default:
    715 		if (c == DEFFUNC)
    716 			isfuncdef = 1;
    717 		catterm = 0;
    718 	}
    719 	lexlast = c;
    720 	if (wasfield)
    721 		wasfield--;
    722 	/*
    723 	 * Map character constants to symbolic names.
    724 	 */
    725 	for (i = 0; ctosym[i].c != 0; i++)
    726 		if (c == ctosym[i].c) {
    727 			c = ctosym[i].sym;
    728 			break;
    729 		}
    730 	return ((int)c);
    731 }
    732 
    733 /*
    734  * Read a number for the lexical analyzer.
    735  * Input is the first character of the number.
    736  * Return value is the lexical type.
    737  */
    738 static int
    739 lexnumber(wint_t c)
    740 {
    741 	wchar_t *cp;
    742 	int dotfound = 0;
    743 	int efound = 0;
    744 	INT number;
    745 
    746 	cp = linebuf;
    747 	do {
    748 		if (iswdigit(c))
    749 			;
    750 		else if (c == '.') {
    751 			if (dotfound++)
    752 				break;
    753 		} else if (c == 'e' || c == 'E') {
    754 			if ((c = lexgetc()) != '-' && c != '+') {
    755 				lexungetc(c);
    756 				c = 'e';
    757 			} else
    758 				*cp++ = 'e';
    759 			if (efound++)
    760 				break;
    761 		} else
    762 			break;
    763 		*cp++ = c;
    764 	} while ((c = lexgetc()) != WEOF);
    765 	*cp = '\0';
    766 	if (dotfound && cp == linebuf+1)
    767 		return (DOT);
    768 	lexungetc(c);
    769 	errno = 0;
    770 	if (!dotfound && !efound &&
    771 	    ((number = wcstol(linebuf, (wchar_t **)0, 10)), errno != ERANGE))
    772 		yylval.node = intnode(number);
    773 	else
    774 		yylval.node = realnode((REAL)wcstod(linebuf, (wchar_t **)0));
    775 	return (CONSTANT);
    776 }
    777 
    778 /*
    779  * Read an identifier.
    780  * Input is first character of identifier.
    781  * Return VAR.
    782  */
    783 static int
    784 lexid(wint_t c)
    785 {
    786 	wchar_t *cp;
    787 	size_t i;
    788 	NODE *np;
    789 
    790 	cp = linebuf;
    791 	do {
    792 		*cp++ = c;
    793 		c = lexgetc();
    794 	} while (iswalpha(c) || iswdigit(c) || c == '_');
    795 	*cp = '\0';
    796 	lexungetc(c);
    797 	yylval.node = np = vlook(linebuf);
    798 
    799 	switch (np->n_type) {
    800 	case KEYWORD:
    801 		switch (np->n_keywtype) {
    802 		case PRINT:
    803 		case PRINTF:
    804 			++inprint;
    805 		default:
    806 			return ((int)np->n_keywtype);
    807 		}
    808 		/* NOTREACHED */
    809 
    810 	case ARRAY:
    811 	case VAR:
    812 		/*
    813 		 * If reading the argument list, create a dummy node
    814 		 * for the duration of that function. These variables
    815 		 * can be removed from the symbol table at function end
    816 		 * but they must still exist because the execution tree
    817 		 * knows about them.
    818 		 */
    819 		if (funparm) {
    820 do_funparm:
    821 			np = emptynode(PARM, i = (cp-linebuf));
    822 			np->n_flags = FSTRING;
    823 			np->n_string = _null;
    824 			np->n_strlen = 0;
    825 			(void) memcpy(np->n_name, linebuf,
    826 			    (i+1) * sizeof (wchar_t));
    827 			addsymtab(np);
    828 			yylval.node = np;
    829 		} else if (np == varNF || (np == varFS &&
    830 		    (!doing_begin || begin_getline))) {
    831 			/*
    832 			 * If the user program references NF or sets
    833 			 * FS either outside of a begin block or
    834 			 * in a begin block after a getline then the
    835 			 * input line will be split immediately upon read
    836 			 * rather than when a field is first referenced.
    837 			 */
    838 			needsplit = 1;
    839 		} else if (np == varENVIRON)
    840 			needenviron = 1;
    841 	/*FALLTHRU*/
    842 	case PARM:
    843 		return (VAR);
    844 
    845 	case UFUNC:
    846 		/*
    847 		 * It is ok to redefine functions as parameters
    848 		 */
    849 		if (funparm) goto do_funparm;
    850 	/*FALLTHRU*/
    851 	case FUNC:
    852 	case GETLINE:
    853 		/*
    854 		 * When a getline is encountered, clear the 'doing_begin' flag.
    855 		 * This will force the 'needsplit' flag to be set, even inside
    856 		 * a begin block, if FS is altered. (See VAR case above)
    857 		 */
    858 		if (doing_begin)
    859 			begin_getline = 1;
    860 		return (np->n_type);
    861 	}
    862 	/* NOTREACHED */
    863 	return (0);
    864 }
    865 
    866 /*
    867  * Read a string for the lexical analyzer.
    868  * `endc' terminates the string.
    869  */
    870 static int
    871 lexstring(wint_t endc)
    872 {
    873 	size_t length = lexescape(endc, 0, 0);
    874 
    875 	yylval.node = stringnode(linebuf, FALLOC, length);
    876 	return (CONSTANT);
    877 }
    878 
    879 /*
    880  * Read a regular expression.
    881  */
    882 static int
    883 lexregexp(wint_t endc)
    884 {
    885 	(void) lexescape(endc, 1, 0);
    886 	yylval.node = renode(linebuf);
    887 	return (URE);
    888 }
    889 
    890 /*
    891  * Process a string, converting the escape characters as required by
    892  * 1003.2. The processed string ends up in the global linebuf[]. This
    893  * routine also changes the value of 'progfd' - the program file
    894  * descriptor, so it should be used with some care. It is presently used to
    895  * process -v (awk1.c) and var=str type arguments (awk2.c, nextrecord()).
    896  */
    897 void
    898 strescape(wchar_t *str)
    899 {
    900 	progptr = str;
    901 	proglen = wcslen(str) + 1;	/* Include \0 */
    902 	(void) lexescape('\0', 0, 1);
    903 	progptr = NULL;
    904 }
    905 
    906 /*
    907  * Read a string or regular expression, terminated by ``endc'',
    908  * for lexical analyzer, processing escape sequences.
    909  * Return string length.
    910  */
    911 static size_t
    912 lexescape(wint_t endc, int regx, int cmd_line_operand)
    913 {
    914 	static char nlre[256];
    915 	static char nlstr[256];
    916 	static char eofre[256];
    917 	static char eofstr[256];
    918 	int first_time = 1;
    919 	wint_t c;
    920 	wchar_t *cp;
    921 	int n, max;
    922 
    923 	if (first_time == 1) {
    924 		(void) strcpy(nlre, gettext("Newline in regular expression\n"));
    925 		(void) strcpy(nlstr, gettext("Newline in string\n"));
    926 		(void) strcpy(eofre, gettext("EOF in regular expression\n"));
    927 		(void) strcpy(eofstr, gettext("EOF in string\n"));
    928 		first_time = 0;
    929 	}
    930 
    931 	cp = linebuf;
    932 	while ((c = lexgetc()) != endc) {
    933 		if (c == '\n')
    934 			awkerr(regx ? nlre : nlstr);
    935 		if (c == '\\') {
    936 			switch (c = lexgetc(), c) {
    937 			case '\\':
    938 				if (regx)
    939 					*cp++ = '\\';
    940 				break;
    941 
    942 			case '/':
    943 				c = '/';
    944 				break;
    945 
    946 			case 'n':
    947 				c = '\n';
    948 				break;
    949 
    950 			case 'b':
    951 				c = '\b';
    952 				break;
    953 
    954 			case 't':
    955 				c = '\t';
    956 				break;
    957 
    958 			case 'r':
    959 				c = '\r';
    960 				break;
    961 
    962 			case 'f':
    963 				c = '\f';
    964 				break;
    965 
    966 			case 'v':
    967 				c = '\v';
    968 				break;
    969 
    970 			case 'a':
    971 				c = (char)0x07;
    972 				break;
    973 
    974 			case 'x':
    975 				n = 0;
    976 				while (iswxdigit(c = lexgetc())) {
    977 					if (iswdigit(c))
    978 						c -= '0';
    979 					else if (iswupper(c))
    980 						c -= 'A'-10;
    981 					else
    982 						c -= 'a'-10;
    983 					n = (n<<4) + c;
    984 				}
    985 				lexungetc(c);
    986 				c = n;
    987 				break;
    988 
    989 			case '0':
    990 			case '1':
    991 			case '2':
    992 			case '3':
    993 			case '4':
    994 			case '5':
    995 			case '6':
    996 			case '7':
    997 #if 0
    998 /*
    999  * Posix.2 draft 10 disallows the use of back-referencing - it explicitly
   1000  * requires processing of the octal escapes both in strings and
   1001  * regular expressions. The following code is disabled instead of
   1002  * removed as back-referencing may be reintroduced in a future draft
   1003  * of the standard.
   1004  */
   1005 				/*
   1006 				 * For regular expressions, we disallow
   1007 				 * \ooo to mean octal character, in favour
   1008 				 * of back referencing.
   1009 				 */
   1010 				if (regx) {
   1011 					*cp++ = '\\';
   1012 					break;
   1013 				}
   1014 #endif
   1015 				max = 3;
   1016 				n = 0;
   1017 				do {
   1018 					n = (n<<3) + c-'0';
   1019 					if ((c = lexgetc()) > '7' || c < '0')
   1020 						break;
   1021 				} while (--max);
   1022 				lexungetc(c);
   1023 				/*
   1024 				 * an octal escape sequence must have at least
   1025 				 * 2 digits after the backslash, otherwise
   1026 				 * it gets passed straight thru for possible
   1027 				 * use in backreferencing.
   1028 				 */
   1029 				if (max == 3) {
   1030 					*cp++ = '\\';
   1031 					n += '0';
   1032 				}
   1033 				c = n;
   1034 				break;
   1035 
   1036 			case '\n':
   1037 				continue;
   1038 
   1039 			default:
   1040 				if (c != endc || cmd_line_operand) {
   1041 					*cp++ = '\\';
   1042 					if (c == endc)
   1043 						lexungetc(c);
   1044 				}
   1045 			}
   1046 		}
   1047 		if (c == WEOF)
   1048 			awkerr(regx ? eofre : eofstr);
   1049 		*cp++ = c;
   1050 	}
   1051 	*cp = '\0';
   1052 	return (cp - linebuf);
   1053 }
   1054 
   1055 /*
   1056  * Build a regular expression NODE.
   1057  * Argument is the string holding the expression.
   1058  */
   1059 NODE *
   1060 renode(wchar_t *s)
   1061 {
   1062 	NODE *np;
   1063 	int n;
   1064 
   1065 	np = emptynode(RE, 0);
   1066 	np->n_left = np->n_right = NNULL;
   1067 	if ((n = REGWCOMP(&np->n_regexp, s)) != REG_OK) {
   1068 		int m;
   1069 		char *p;
   1070 
   1071 		m = REGWERROR(n, np->n_regexp, NULL, 0);
   1072 		p = (char *)emalloc(m);
   1073 		REGWERROR(n, np->n_regexp, p, m);
   1074 		awkerr("/%S/: %s", s, p);
   1075 	}
   1076 	return (np);
   1077 }
   1078 /*
   1079  * Get a character for the lexical analyser routine.
   1080  */
   1081 static wint_t
   1082 lexgetc()
   1083 {
   1084 	wint_t c;
   1085 	static char **files = &progfiles[0];
   1086 
   1087 	if (progfp != FNULL && (c = fgetwc(progfp)) != WEOF)
   1088 		;
   1089 	else {
   1090 		if (progptr != NULL) {
   1091 			if (proglen-- <= 0)
   1092 				c = WEOF;
   1093 			else
   1094 				c = *progptr++;
   1095 		} else {
   1096 			if (progfp != FNULL)
   1097 				if (progfp != stdin)
   1098 					(void) fclose(progfp);
   1099 				else
   1100 					clearerr(progfp);
   1101 				progfp = FNULL;
   1102 			if (files < progfilep) {
   1103 				filename = *files++;
   1104 				lineno = 1;
   1105 				if (filename[0] == '-' && filename[1] == '\0')
   1106 					progfp = stdin;
   1107 				else if ((progfp = fopen(filename, r))
   1108 				    == FNULL) {
   1109 					(void) fprintf(stderr,
   1110 				gettext("script file \"%s\""), filename);
   1111 					exit(1);
   1112 				}
   1113 				c = fgetwc(progfp);
   1114 			}
   1115 		}
   1116 	}
   1117 	if (c == '\n')
   1118 		++lineno;
   1119 	if (conptr >= &context[NCONTEXT])
   1120 		conptr = &context[0];
   1121 	if (c != WEOF)
   1122 		*conptr++ = c;
   1123 	return (c);
   1124 }
   1125 
   1126 /*
   1127  * Return a character for lexical analyser.
   1128  * Only one returned character is (not enforced) legitimite.
   1129  */
   1130 static void
   1131 lexungetc(wint_t c)
   1132 {
   1133 	if (c == '\n')
   1134 		--lineno;
   1135 	if (c != WEOF) {
   1136 		if (conptr == &context[0])
   1137 			conptr = &context[NCONTEXT];
   1138 		*--conptr = '\0';
   1139 	}
   1140 	if (progfp != FNULL) {
   1141 		(void) ungetwc(c, progfp);
   1142 		return;
   1143 	}
   1144 	if (c == WEOF)
   1145 		return;
   1146 	*--progptr = c;
   1147 	proglen++;
   1148 }
   1149 
   1150 /*
   1151  * Syntax errors during parsing.
   1152  */
   1153 void
   1154 yyerror(char *s, ...)
   1155 {
   1156 	if (lexlast == FUNC || lexlast == GETLINE || lexlast == KEYWORD)
   1157 		if (lexlast == KEYWORD)
   1158 			awkerr(gettext("inadmissible use of reserved keyword"));
   1159 		else
   1160 			awkerr(gettext("attempt to redefine builtin function"));
   1161 	awkerr(s);
   1162 }
   1163 
   1164 /*
   1165  * Error routine for all awk errors.
   1166  */
   1167 /* ARGSUSED */
   1168 void
   1169 awkerr(char *fmt, ...)
   1170 {
   1171 	va_list args;
   1172 
   1173 	va_start(args, fmt);
   1174 	awkierr(0, fmt, args);
   1175 	va_end(args);
   1176 }
   1177 
   1178 /*
   1179  * Error routine like "awkerr" except that it prints out
   1180  * a message that includes an errno-specific indication.
   1181  */
   1182 /* ARGSUSED */
   1183 void
   1184 awkperr(char *fmt, ...)
   1185 {
   1186 	va_list args;
   1187 
   1188 	va_start(args, fmt);
   1189 	awkierr(1, fmt, args);
   1190 	va_end(args);
   1191 }
   1192 
   1193 /*
   1194  * Common internal routine for awkerr, awkperr
   1195  */
   1196 static void
   1197 awkierr(int perr, char *fmt, va_list ap)
   1198 {
   1199 	static char sep1[] = "\n>>>\t";
   1200 	static char sep2[] = "\t<<<";
   1201 	int saveerr = errno;
   1202 
   1203 	(void) fprintf(stderr, "%s: ", _cmdname);
   1204 	if (running) {
   1205 		(void) fprintf(stderr, gettext("line %u ("),
   1206 		    curnode == NNULL ? 0 : curnode->n_lineno);
   1207 		if (phase == 0)
   1208 			(void) fprintf(stderr, "NR=%lld): ",
   1209 			    (INT)exprint(varNR));
   1210 		else
   1211 			(void) fprintf(stderr, "%s): ",
   1212 			    phase == BEGIN ? s_BEGIN : s_END);
   1213 	} else if (lineno != 0) {
   1214 		(void) fprintf(stderr, gettext("file \"%s\": "), filename);
   1215 		(void) fprintf(stderr, gettext("line %u: "), lineno);
   1216 	}
   1217 	(void) vfprintf(stderr, gettext(fmt), ap);
   1218 	if (perr == 1)
   1219 		(void) fprintf(stderr, ": %s", strerror(saveerr));
   1220 	if (perr != 2 && !running) {
   1221 		wchar_t *cp;
   1222 		int n;
   1223 		int c;
   1224 
   1225 		(void) fprintf(stderr, gettext("  Context is:%s"), sep1);
   1226 		cp = conptr;
   1227 		n = NCONTEXT;
   1228 		do {
   1229 			if (cp >= &context[NCONTEXT])
   1230 				cp = &context[0];
   1231 			if ((c = *cp++) != '\0')
   1232 				(void) fputs(c == '\n' ? sep1 : toprint(c),
   1233 				    stderr);
   1234 		} while (--n != 0);
   1235 		(void) fputs(sep2, stderr);
   1236 	}
   1237 	(void) fprintf(stderr, "\n");
   1238 	exit(1);
   1239 }
   1240 
   1241 wchar_t *
   1242 emalloc(unsigned n)
   1243 {
   1244 	wchar_t *cp;
   1245 
   1246 	if ((cp = malloc(n)) == NULL)
   1247 		awkerr(nomem);
   1248 	return (cp);
   1249 }
   1250 
   1251 wchar_t *
   1252 erealloc(wchar_t *p, unsigned n)
   1253 {
   1254 	wchar_t *cp;
   1255 
   1256 	if ((cp = realloc(p, n)) == NULL)
   1257 		awkerr(nomem);
   1258 	return (cp);
   1259 }
   1260 
   1261 
   1262 /*
   1263  * usage message for awk
   1264  */
   1265 static int
   1266 usage()
   1267 {
   1268 	(void) fprintf(stderr, gettext(
   1269 "Usage:	awk [-F ERE] [-v var=val] 'program' [var=val ...] [file ...]\n"
   1270 "	awk [-F ERE] -f progfile ... [-v var=val] [var=val ...] [file ...]\n"));
   1271 	return (2);
   1272 }
   1273 
   1274 
   1275 static wchar_t *
   1276 mbconvert(char *str)
   1277 {
   1278 	static wchar_t *op = 0;
   1279 
   1280 	if (op != 0)
   1281 		free(op);
   1282 	return (op = mbstowcsdup(str));
   1283 }
   1284 
   1285 char *
   1286 mbunconvert(wchar_t *str)
   1287 {
   1288 	static char *op = 0;
   1289 
   1290 	if (op != 0)
   1291 		free(op);
   1292 	return (op = wcstombsdup(str));
   1293 }
   1294 
   1295 /*
   1296  * Solaris port - following functions are typical MKS functions written
   1297  * to work for Solaris.
   1298  */
   1299 
   1300 wchar_t *
   1301 mbstowcsdup(s)
   1302 char *s;
   1303 {
   1304 	int n;
   1305 	wchar_t *w;
   1306 
   1307 	n = strlen(s) + 1;
   1308 	if ((w = (wchar_t *)malloc(n * sizeof (wchar_t))) == NULL)
   1309 		return (NULL);
   1310 
   1311 	if (mbstowcs(w, s, n) == (size_t)-1)
   1312 		return (NULL);
   1313 	return (w);
   1314 
   1315 }
   1316 
   1317 char *
   1318 wcstombsdup(wchar_t *w)
   1319 {
   1320 	int n;
   1321 	char *mb;
   1322 
   1323 	/* Fetch memory for worst case string length */
   1324 	n = wslen(w) + 1;
   1325 	n *= MB_CUR_MAX;
   1326 	if ((mb = (char *)malloc(n)) == NULL) {
   1327 		return (NULL);
   1328 	}
   1329 
   1330 	/* Convert the string */
   1331 	if ((n = wcstombs(mb, w, n)) == -1) {
   1332 		int saverr = errno;
   1333 
   1334 		free(mb);
   1335 		errno = saverr;
   1336 		return (0);
   1337 	}
   1338 
   1339 	/* Shrink the string down */
   1340 	if ((mb = (char *)realloc(mb, strlen(mb)+1)) == NULL)  {
   1341 		return (NULL);
   1342 	}
   1343 	return (mb);
   1344 }
   1345 
   1346 /*
   1347  * The upe_ctrls[] table contains the printable 'control-sequences' for the
   1348  * character values 0..31 and 127.  The first entry is for value 127, thus the
   1349  * entries for the remaining character values are from 1..32.
   1350  */
   1351 static const char *const upe_ctrls[] =
   1352 {
   1353 	"^?",
   1354 	"^@",  "^A",  "^B",  "^C",  "^D",  "^E",  "^F",  "^G",
   1355 	"^H",  "^I",  "^J",  "^K",  "^L",  "^M",  "^N",  "^O",
   1356 	"^P",  "^Q",  "^R",  "^S",  "^T",  "^U",  "^V",  "^W",
   1357 	"^X",  "^Y",  "^Z",  "^[",  "^\\", "^]",  "^^",  "^_"
   1358 };
   1359 
   1360 
   1361 /*
   1362  * Return a printable string corresponding to the given character value.  If
   1363  * the character is printable, simply return it as the string.  If it is in
   1364  * the range specified by table 5-101 in the UPE, return the corresponding
   1365  * string.  Otherwise, return an octal escape sequence.
   1366  */
   1367 static const char *
   1368 toprint(c)
   1369 wchar_t c;
   1370 {
   1371 	int n, len;
   1372 	unsigned char *ptr;
   1373 	static char mbch[MB_LEN_MAX+1];
   1374 	static char buf[5 * MB_LEN_MAX + 1];
   1375 
   1376 	if ((n = wctomb(mbch, c)) == -1) {
   1377 		/* Should never happen */
   1378 		(void) sprintf(buf, "\\%x", c);
   1379 		return (buf);
   1380 	}
   1381 	mbch[n] = '\0';
   1382 	if (iswprint(c)) {
   1383 		return (mbch);
   1384 	} else if (c == 127) {
   1385 		return (upe_ctrls[0]);
   1386 	} else if (c < 32) {
   1387 		/* Print as in Table 5-101 in the UPE */
   1388 		return (upe_ctrls[c+1]);
   1389 	} else {
   1390 		/* Print as an octal escape sequence */
   1391 		for (len = 0, ptr = (unsigned char *) mbch; 0 < n; --n, ++ptr)
   1392 			len += sprintf(buf+len, "\\%03o", *ptr);
   1393 	}
   1394 	return (buf);
   1395 }
   1396 
   1397 static int
   1398 wcoff(const wchar_t *astring, const int off)
   1399 {
   1400 	const wchar_t *s = astring;
   1401 	int c = 0;
   1402 	char mb[MB_LEN_MAX];
   1403 
   1404 	while (c < off) {
   1405 		int n;
   1406 		if ((n = wctomb(mb, *s)) == 0)
   1407 			break;
   1408 		if (n == -1)
   1409 			n = 1;
   1410 		c += n;
   1411 		s++;
   1412 	}
   1413 
   1414 	return (s - astring);
   1415 }
   1416 
   1417 #define	NREGHASH	64
   1418 #define	NREGHOLD	1024	/* max number unused entries */
   1419 
   1420 static int	nregunref;
   1421 
   1422 struct reghashq {
   1423 	struct qelem hq;
   1424 	struct regcache *regcachep;
   1425 };
   1426 
   1427 struct regcache {
   1428 	struct qelem	lq;
   1429 	wchar_t	*pattern;
   1430 	regex_t	re;
   1431 	int	refcnt;
   1432 	struct reghashq	hash;
   1433 };
   1434 
   1435 static struct qelem reghash[NREGHASH], reglink;
   1436 
   1437 /*
   1438  * Generate a hash value of the given wchar string.
   1439  * The hashing method is similar to what Java does for strings.
   1440  */
   1441 static uint_t
   1442 regtxthash(const wchar_t *str)
   1443 {
   1444 	int k = 0;
   1445 
   1446 	while (*str != L'\0')
   1447 		k = (31 * k) + *str++;
   1448 
   1449 	k += ~(k << 9);
   1450 	k ^=  (k >> 14);
   1451 	k +=  (k << 4);
   1452 	k ^=  (k >> 10);
   1453 
   1454 	return (k % NREGHASH);
   1455 }
   1456 
   1457 int
   1458 int_regwcomp(REGEXP *r, const wchar_t *pattern)
   1459 {
   1460 	regex_t re;
   1461 	char *mbpattern;
   1462 	int ret;
   1463 	uint_t key;
   1464 	struct qelem *qp;
   1465 	struct regcache *rcp;
   1466 
   1467 	key = regtxthash(pattern);
   1468 	for (qp = reghash[key].q_forw; qp != NULL; qp = qp->q_forw) {
   1469 		rcp = ((struct reghashq *)qp)->regcachep;
   1470 		if (*rcp->pattern == *pattern &&
   1471 		    wcscmp(rcp->pattern, pattern) == 0)
   1472 			break;
   1473 	}
   1474 	if (qp != NULL) {
   1475 		/* update link. put this one at the beginning */
   1476 		if (rcp != (struct regcache *)reglink.q_forw) {
   1477 			remque(&rcp->lq);
   1478 			insque(&rcp->lq, &reglink);
   1479 		}
   1480 		if (rcp->refcnt == 0)
   1481 			nregunref--;	/* no longer unref'ed */
   1482 		rcp->refcnt++;
   1483 		*(struct regcache **)r = rcp;
   1484 		return (REG_OK);
   1485 	}
   1486 
   1487 	if ((mbpattern = wcstombsdup((wchar_t *)pattern)) == NULL)
   1488 		return (REG_ESPACE);
   1489 
   1490 	ret = regcomp(&re, mbpattern, REG_EXTENDED);
   1491 
   1492 	free(mbpattern);
   1493 
   1494 	if (ret != REG_OK)
   1495 		return (ret);
   1496 
   1497 	if ((rcp = malloc(sizeof (struct regcache))) == NULL)
   1498 		return (REG_ESPACE);
   1499 	rcp->re = re;
   1500 	if ((rcp->pattern = wsdup(pattern)) == NULL) {
   1501 		regfree(&re);
   1502 		free(rcp);
   1503 		return (REG_ESPACE);
   1504 	}
   1505 	rcp->refcnt = 1;
   1506 	insque(&rcp->lq, &reglink);
   1507 	insque(&rcp->hash.hq, &reghash[key]);
   1508 	rcp->hash.regcachep = rcp;
   1509 
   1510 	*(struct regcache **)r = rcp;
   1511 	return (ret);
   1512 }
   1513 
   1514 void
   1515 int_regwfree(REGEXP r)
   1516 {
   1517 	int	cnt;
   1518 	struct qelem *qp, *nqp;
   1519 	struct regcache *rcp;
   1520 
   1521 	rcp = (struct regcache *)r;
   1522 
   1523 	if (--rcp->refcnt != 0)
   1524 		return;
   1525 
   1526 	/* this cache has no reference */
   1527 	if (++nregunref < NREGHOLD)
   1528 		return;
   1529 
   1530 	/*
   1531 	 * We've got too much unref'ed regex. Free half of least
   1532 	 * used regex.
   1533 	 */
   1534 	cnt = 0;
   1535 	for (qp = reglink.q_forw; qp != NULL; qp = nqp) {
   1536 		nqp = qp->q_forw;
   1537 		rcp = (struct regcache *)qp;
   1538 		if (rcp->refcnt != 0)
   1539 			continue;
   1540 
   1541 		/* free half of them */
   1542 		if (++cnt < (NREGHOLD / 2))
   1543 			continue;
   1544 
   1545 		/* detach and free */
   1546 		remque(&rcp->lq);
   1547 		remque(&rcp->hash.hq);
   1548 
   1549 		/* free up */
   1550 		free(rcp->pattern);
   1551 		regfree(&rcp->re);
   1552 		free(rcp);
   1553 
   1554 		nregunref--;
   1555 	}
   1556 }
   1557 
   1558 size_t
   1559 int_regwerror(int errcode, REGEXP r, char *errbuf, size_t bufsiz)
   1560 {
   1561 	struct regcache *rcp;
   1562 
   1563 	rcp = (struct regcache *)r;
   1564 	return (regerror(errcode, &rcp->re, errbuf, bufsiz));
   1565 }
   1566 
   1567 int
   1568 int_regwexec(REGEXP r,		/* compiled RE */
   1569 	const wchar_t *astring,	/* subject string */
   1570 	size_t nsub,		/* number of subexpressions */
   1571 	int_regwmatch_t *sub,	/* subexpression pointers */
   1572 	int flags)
   1573 {
   1574 	char *mbs;
   1575 	regmatch_t *mbsub = NULL;
   1576 	int i;
   1577 	struct regcache *rcp;
   1578 
   1579 	if ((mbs = wcstombsdup((wchar_t *)astring)) == NULL)
   1580 		return (REG_ESPACE);
   1581 
   1582 	if (nsub > 0 && sub) {
   1583 		if ((mbsub = malloc(nsub * sizeof (regmatch_t))) == NULL)
   1584 			return (REG_ESPACE);
   1585 	}
   1586 
   1587 	rcp = (struct regcache *)r;
   1588 
   1589 	i = regexec(&rcp->re, mbs, nsub, mbsub, flags);
   1590 
   1591 	/* Now, adjust the pointers/counts in sub */
   1592 	if (i == REG_OK && nsub > 0 && mbsub) {
   1593 		int j, k;
   1594 
   1595 		for (j = 0; j < nsub; j++) {
   1596 			regmatch_t *ms = &mbsub[j];
   1597 			int_regwmatch_t *ws = &sub[j];
   1598 
   1599 			if ((k = ms->rm_so) >= 0) {
   1600 				ws->rm_so = wcoff(astring, k);
   1601 				ws->rm_sp = astring + ws->rm_so;
   1602 			}
   1603 			if ((k = ms->rm_eo) >= 0) {
   1604 				ws->rm_eo = wcoff(astring, k);
   1605 				ws->rm_ep = astring + ws->rm_eo;
   1606 			}
   1607 		}
   1608 	}
   1609 
   1610 	free(mbs);
   1611 	if (mbsub)
   1612 		free(mbsub);
   1613 	return (i);
   1614 }
   1615 
   1616 int
   1617 int_regwdosuba(REGEXP rp,		/* compiled RE: Pattern */
   1618 	const wchar_t *rpl,		/* replacement string: /rpl/ */
   1619 	const wchar_t *src,		/* source string */
   1620 	wchar_t **dstp,			/* destination string */
   1621 	int len,			/* destination length */
   1622 	int *globp)	/* IN: occurence, 0 for all; OUT: substitutions */
   1623 {
   1624 	wchar_t *dst, *odst;
   1625 	const wchar_t *ip, *xp;
   1626 	wchar_t *op;
   1627 	int i;
   1628 	wchar_t c;
   1629 	int glob, iglob = *globp, oglob = 0;
   1630 #define	NSUB	10
   1631 	int_regwmatch_t rm[NSUB], *rmp;
   1632 	int flags;
   1633 	wchar_t *end;
   1634 	int regerr;
   1635 
   1636 /* handle overflow of dst. we need "i" more bytes */
   1637 #ifdef OVERFLOW
   1638 #undef OVERFLOW
   1639 #define	OVERFLOW(i) { \
   1640 		int pos = op - dst; \
   1641 		dst = (wchar_t *)realloc(odst = dst, \
   1642 			(len += len + i) * sizeof (wchar_t)); \
   1643 		if (dst == NULL) \
   1644 			goto nospace; \
   1645 		op = dst + pos; \
   1646 		end = dst + len; \
   1647 	}
   1648 #endif
   1649 
   1650 	*dstp = dst = (wchar_t *)malloc(len * sizeof (wchar_t));
   1651 	if (dst == NULL)
   1652 		return (REG_ESPACE);
   1653 
   1654 	if (rp == NULL || rpl == NULL || src == NULL || dst ==  NULL)
   1655 		return (REG_EFATAL);
   1656 
   1657 	glob = 0;	/* match count */
   1658 	ip = src;	/* source position */
   1659 	op = dst;	/* destination position */
   1660 	end = dst + len;
   1661 
   1662 	flags = 0;
   1663 	while ((regerr = int_regwexec(rp, ip, NSUB, rm, flags)) == REG_OK) {
   1664 		/* Copy text preceding match */
   1665 		if (op + (i = rm[0].rm_sp - ip) >= end)
   1666 			OVERFLOW(i)
   1667 		while (i--)
   1668 			*op++ = *ip++;
   1669 
   1670 		if (iglob == 0 || ++glob == iglob) {
   1671 			oglob++;
   1672 			xp = rpl;		/* do substitute */
   1673 		} else
   1674 			xp = L"&";		/* preserve text */
   1675 
   1676 		/* Perform replacement of matched substing */
   1677 		while ((c = *xp++) != '\0') {
   1678 			rmp = NULL;
   1679 			if (c == '&')
   1680 				rmp = &rm[0];
   1681 			else if (c == '\\') {
   1682 				if ('0' <= *xp && *xp <= '9')
   1683 					rmp = &rm[*xp++ - '0'];
   1684 				else if (*xp != '\0')
   1685 					c = *xp++;
   1686 			}
   1687 
   1688 			if (rmp ==  NULL) {	/* Ordinary character. */
   1689 				*op++ = c;
   1690 				if (op >= end)
   1691 					OVERFLOW(1)
   1692 			} else if (rmp->rm_sp != NULL && rmp->rm_ep != NULL) {
   1693 				ip = rmp->rm_sp;
   1694 				if (op + (i = rmp->rm_ep - rmp->rm_sp) >= end)
   1695 					OVERFLOW(i)
   1696 				while (i--)
   1697 					*op++ = *ip++;
   1698 			}
   1699 		}
   1700 
   1701 		ip = rm[0].rm_ep;
   1702 		if (*ip == '\0')	/* If at end break */
   1703 			break;
   1704 		else if (rm[0].rm_sp == rm[0].rm_ep) {
   1705 			/* If empty match copy next char */
   1706 			*op++ = *ip++;
   1707 			if (op >= end)
   1708 				OVERFLOW(1)
   1709 		}
   1710 		flags = REG_NOTBOL;
   1711 	}
   1712 
   1713 	if (regerr != REG_OK && regerr != REG_NOMATCH)
   1714 		return (regerr);
   1715 
   1716 	/* Copy rest of text */
   1717 	if (op + (i =  wcslen(ip)) >= end)
   1718 		OVERFLOW(i)
   1719 	while (i--)
   1720 		*op++ = *ip++;
   1721 	*op++ = '\0';
   1722 
   1723 	if ((*dstp = dst = (wchar_t *)realloc(odst = dst,
   1724 	    sizeof (wchar_t) * (size_t)(op - dst))) == NULL) {
   1725 nospace:
   1726 		free(odst);
   1727 		return (REG_ESPACE);
   1728 	}
   1729 
   1730 	*globp = oglob;
   1731 
   1732 	return ((oglob == 0) ? REG_NOMATCH : REG_OK);
   1733 }
   1734