Home | History | Annotate | Download | only in awk
      1 /*
      2  * CDDL HEADER START
      3  *
      4  * The contents of this file are subject to the terms of the
      5  * Common Development and Distribution License (the "License").
      6  * You may not use this file except in compliance with the License.
      7  *
      8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
      9  * or http://www.opensolaris.org/os/licensing.
     10  * See the License for the specific language governing permissions
     11  * and limitations under the License.
     12  *
     13  * When distributing Covered Code, include this CDDL HEADER in each
     14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
     15  * If applicable, add the following below this CDDL HEADER, with the
     16  * fields enclosed by brackets "[]" replaced with your own identifying
     17  * information: Portions Copyright [yyyy] [name of copyright owner]
     18  *
     19  * CDDL HEADER END
     20  */
     21 
     22 /*
     23  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
     24  * Use is subject to license terms.
     25  */
     26 
     27 /*	Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T	*/
     28 /*	  All Rights Reserved  	*/
     29 
     30 #pragma ident	"%Z%%M%	%I%	%E% SMI"
     31 
     32 #define	DEBUG
     33 #include <stdio.h>
     34 #include <stdlib.h>
     35 #include <ctype.h>
     36 #include <string.h>
     37 #include "awk.h"
     38 #include "y.tab.h"
     39 
     40 #define	FULLTAB	2	/* rehash when table gets this x full */
     41 #define	GROWTAB 4	/* grow table by this factor */
     42 
     43 Array	*symtab;	/* main symbol table */
     44 
     45 uchar	**FS;		/* initial field sep */
     46 uchar	**RS;		/* initial record sep */
     47 uchar	**OFS;		/* output field sep */
     48 uchar	**ORS;		/* output record sep */
     49 uchar	**OFMT;		/* output format for numbers */
     50 Awkfloat *NF;		/* number of fields in current record */
     51 Awkfloat *NR;		/* number of current record */
     52 Awkfloat *FNR;		/* number of current record in current file */
     53 uchar	**FILENAME;	/* current filename argument */
     54 Awkfloat *ARGC;		/* number of arguments from command line */
     55 uchar	**SUBSEP;	/* subscript separator for a[i,j,k]; default \034 */
     56 Awkfloat *RSTART;	/* start of re matched with ~; origin 1 (!) */
     57 Awkfloat *RLENGTH;	/* length of same */
     58 
     59 Cell	*recloc;	/* location of record */
     60 Cell	*nrloc;		/* NR */
     61 Cell	*nfloc;		/* NF */
     62 Cell	*fnrloc;	/* FNR */
     63 Array	*ARGVtab;	/* symbol table containing ARGV[...] */
     64 Array	*ENVtab;	/* symbol table containing ENVIRON[...] */
     65 Cell	*rstartloc;	/* RSTART */
     66 Cell	*rlengthloc;	/* RLENGTH */
     67 Cell	*symtabloc;	/* SYMTAB */
     68 
     69 Cell	*nullloc;
     70 Node	*nullnode;	/* zero&null, converted into a node for comparisons */
     71 
     72 static	void	rehash(Array *);
     73 
     74 void
     75 syminit(void)
     76 {
     77 	init_buf(&record, &record_size, LINE_INCR);
     78 
     79 	/* initialize $0 */
     80 	recloc = getfld(0);
     81 	recloc->nval = (uchar *)"$0";
     82 	recloc->sval = record;
     83 	recloc->tval = REC|STR|DONTFREE;
     84 
     85 	symtab = makesymtab(NSYMTAB);
     86 	(void) setsymtab((uchar *)"0", (uchar *)"0", 0.0,
     87 	    NUM|STR|CON|DONTFREE, symtab);
     88 	/* this is used for if(x)... tests: */
     89 	nullloc = setsymtab((uchar *)"$zero&null", (uchar *)"", 0.0,
     90 	    NUM|STR|CON|DONTFREE, symtab);
     91 	nullnode = valtonode(nullloc, CCON);
     92 	FS = &setsymtab((uchar *)"FS", (uchar *)" ", 0.0,
     93 	    STR|DONTFREE, symtab)->sval;
     94 	RS = &setsymtab((uchar *)"RS", (uchar *)"\n", 0.0,
     95 	    STR|DONTFREE, symtab)->sval;
     96 	OFS = &setsymtab((uchar *)"OFS", (uchar *)" ", 0.0,
     97 	    STR|DONTFREE, symtab)->sval;
     98 	ORS = &setsymtab((uchar *)"ORS", (uchar *)"\n", 0.0,
     99 	    STR|DONTFREE, symtab)->sval;
    100 	OFMT = &setsymtab((uchar *)"OFMT", (uchar *)"%.6g", 0.0,
    101 	    STR|DONTFREE, symtab)->sval;
    102 	FILENAME = &setsymtab((uchar *)"FILENAME", (uchar *)"-", 0.0,
    103 	    STR|DONTFREE, symtab)->sval;
    104 	nfloc = setsymtab((uchar *)"NF", (uchar *)"", 0.0, NUM, symtab);
    105 	NF = &nfloc->fval;
    106 	nrloc = setsymtab((uchar *)"NR", (uchar *)"", 0.0, NUM, symtab);
    107 	NR = &nrloc->fval;
    108 	fnrloc = setsymtab((uchar *)"FNR", (uchar *)"", 0.0, NUM, symtab);
    109 	FNR = &fnrloc->fval;
    110 	SUBSEP = &setsymtab((uchar *)"SUBSEP", (uchar *)"\034", 0.0,
    111 	    STR|DONTFREE, symtab)->sval;
    112 	rstartloc = setsymtab((uchar *)"RSTART", (uchar *)"", 0.0,
    113 	    NUM, symtab);
    114 	RSTART = &rstartloc->fval;
    115 	rlengthloc = setsymtab((uchar *)"RLENGTH", (uchar *)"", 0.0,
    116 	    NUM, symtab);
    117 	RLENGTH = &rlengthloc->fval;
    118 	symtabloc = setsymtab((uchar *)"SYMTAB", (uchar *)"", 0.0, ARR, symtab);
    119 	symtabloc->sval = (uchar *)symtab;
    120 }
    121 
    122 void
    123 arginit(int ac, uchar *av[])
    124 {
    125 	Cell *cp;
    126 	int i;
    127 	uchar temp[11];
    128 
    129 	/* first make FILENAME first real argument */
    130 	for (i = 1; i < ac; i++) {
    131 		if (!isclvar(av[i])) {
    132 			(void) setsval(lookup((uchar *)"FILENAME", symtab),
    133 			    av[i]);
    134 			break;
    135 		}
    136 	}
    137 	ARGC = &setsymtab((uchar *)"ARGC", (uchar *)"", (Awkfloat)ac,
    138 	    NUM, symtab)->fval;
    139 	cp = setsymtab((uchar *)"ARGV", (uchar *)"", 0.0, ARR, symtab);
    140 	ARGVtab = makesymtab(NSYMTAB);	/* could be (int) ARGC as well */
    141 	cp->sval = (uchar *) ARGVtab;
    142 	for (i = 0; i < ac; i++) {
    143 		(void) sprintf((char *)temp, "%d", i);
    144 		if (is_number(*av)) {
    145 			(void) setsymtab(temp, *av, atof((const char *)*av),
    146 			    STR|NUM, ARGVtab);
    147 		} else {
    148 			(void) setsymtab(temp, *av, 0.0, STR, ARGVtab);
    149 		}
    150 		av++;
    151 	}
    152 }
    153 
    154 void
    155 envinit(uchar *envp[])
    156 {
    157 	Cell *cp;
    158 	uchar *p;
    159 
    160 	cp = setsymtab((uchar *)"ENVIRON", (uchar *)"", 0.0, ARR, symtab);
    161 	ENVtab = makesymtab(NSYMTAB);
    162 	cp->sval = (uchar *) ENVtab;
    163 	for (; *envp; envp++) {
    164 		if ((p = (uchar *)strchr((char *)*envp, '=')) == NULL)
    165 			continue;
    166 		*p++ = 0;	/* split into two strings at = */
    167 		if (is_number(p)) {
    168 			(void) setsymtab(*envp, p, atof((const char *)p),
    169 			    STR|NUM, ENVtab);
    170 		} else {
    171 			(void) setsymtab(*envp, p, 0.0, STR, ENVtab);
    172 		}
    173 		/* restore in case env is passed down to a shell */
    174 		p[-1] = '=';
    175 	}
    176 }
    177 
    178 Array *
    179 makesymtab(int n)
    180 {
    181 	Array *ap;
    182 	Cell **tp;
    183 
    184 	ap = (Array *)malloc(sizeof (Array));
    185 	tp = (Cell **)calloc(n, sizeof (Cell *));
    186 	if (ap == NULL || tp == NULL)
    187 		ERROR "out of space in makesymtab" FATAL;
    188 	ap->nelem = 0;
    189 	ap->size = n;
    190 	ap->tab = tp;
    191 	return (ap);
    192 }
    193 
    194 void
    195 freesymtab(Cell *ap)	/* free symbol table */
    196 {
    197 	Cell *cp, *next;
    198 	Array *tp;
    199 	int i;
    200 
    201 	if (!isarr(ap))
    202 		return;
    203 	/*LINTED align*/
    204 	tp = (Array *)ap->sval;
    205 	if (tp == NULL)
    206 		return;
    207 	for (i = 0; i < tp->size; i++) {
    208 		for (cp = tp->tab[i]; cp != NULL; cp = next) {
    209 			next = cp->cnext;
    210 			xfree(cp->nval);
    211 			if (freeable(cp))
    212 				xfree(cp->sval);
    213 			free(cp);
    214 		}
    215 	}
    216 	free(tp->tab);
    217 	free(tp);
    218 }
    219 
    220 void
    221 freeelem(Cell *ap, uchar *s)		/* free elem s from ap (i.e., ap["s"] */
    222 {
    223 	Array *tp;
    224 	Cell *p, *prev = NULL;
    225 	int h;
    226 
    227 	/*LINTED align*/
    228 	tp = (Array *)ap->sval;
    229 	h = hash(s, tp->size);
    230 	for (p = tp->tab[h]; p != NULL; prev = p, p = p->cnext)
    231 		if (strcmp((char *)s, (char *)p->nval) == 0) {
    232 			if (prev == NULL)	/* 1st one */
    233 				tp->tab[h] = p->cnext;
    234 			else			/* middle somewhere */
    235 				prev->cnext = p->cnext;
    236 			if (freeable(p))
    237 				xfree(p->sval);
    238 			free(p->nval);
    239 			free(p);
    240 			tp->nelem--;
    241 			return;
    242 		}
    243 }
    244 
    245 Cell *
    246 setsymtab(uchar *n, uchar *s, Awkfloat f, unsigned int t, Array *tp)
    247 {
    248 	register int h;
    249 	register Cell *p;
    250 
    251 	if (n != NULL && (p = lookup(n, tp)) != NULL) {
    252 		dprintf(("setsymtab found %p: n=%s", (void *)p, p->nval));
    253 		dprintf((" s=\"%s\" f=%g t=%p\n",
    254 		    p->sval, p->fval, (void *)p->tval));
    255 		return (p);
    256 	}
    257 	p = (Cell *)malloc(sizeof (Cell));
    258 	if (p == NULL)
    259 		ERROR "symbol table overflow at %s", n FATAL;
    260 	p->nval = tostring(n);
    261 	p->sval = s ? tostring(s) : tostring((uchar *)"");
    262 	p->fval = f;
    263 	p->tval = t;
    264 	p->csub = 0;
    265 
    266 	tp->nelem++;
    267 	if (tp->nelem > FULLTAB * tp->size)
    268 		rehash(tp);
    269 	h = hash(n, tp->size);
    270 	p->cnext = tp->tab[h];
    271 	tp->tab[h] = p;
    272 	dprintf(("setsymtab set %p: n=%s", (void *)p, p->nval));
    273 	dprintf((" s=\"%s\" f=%g t=%p\n", p->sval, p->fval, (void *)p->tval));
    274 	return (p);
    275 }
    276 
    277 int
    278 hash(uchar *s, int n)	/* form hash value for string s */
    279 {
    280 	register unsigned hashval;
    281 
    282 	for (hashval = 0; *s != '\0'; s++)
    283 		hashval = (*s + 31 * hashval);
    284 	return (hashval % n);
    285 }
    286 
    287 static void
    288 rehash(Array *tp)	/* rehash items in small table into big one */
    289 {
    290 	int i, nh, nsz;
    291 	Cell *cp, *op, **np;
    292 
    293 	nsz = GROWTAB * tp->size;
    294 	np = (Cell **)calloc(nsz, sizeof (Cell *));
    295 	if (np == NULL)
    296 		ERROR "out of space in rehash" FATAL;
    297 	for (i = 0; i < tp->size; i++) {
    298 		for (cp = tp->tab[i]; cp; cp = op) {
    299 			op = cp->cnext;
    300 			nh = hash(cp->nval, nsz);
    301 			cp->cnext = np[nh];
    302 			np[nh] = cp;
    303 		}
    304 	}
    305 	free(tp->tab);
    306 	tp->tab = np;
    307 	tp->size = nsz;
    308 }
    309 
    310 Cell *
    311 lookup(uchar *s, Array *tp)	/* look for s in tp */
    312 {
    313 	register Cell *p;
    314 	int h;
    315 
    316 	h = hash(s, tp->size);
    317 	for (p = tp->tab[h]; p != NULL; p = p->cnext) {
    318 		if (strcmp((char *)s, (char *)p->nval) == 0)
    319 			return (p);	/* found it */
    320 	}
    321 	return (NULL);			/* not found */
    322 }
    323 
    324 Awkfloat
    325 setfval(Cell *vp, Awkfloat f)
    326 {
    327 	int	i;
    328 
    329 	if ((vp->tval & (NUM | STR)) == 0)
    330 		funnyvar(vp, "assign to");
    331 	if (vp->tval & FLD) {
    332 		donerec = 0;	/* mark $0 invalid */
    333 		i = fldidx(vp);
    334 		if (i > *NF)
    335 			newfld(i);
    336 		dprintf(("setting field %d to %g\n", i, f));
    337 	} else if (vp->tval & REC) {
    338 		donefld = 0;	/* mark $1... invalid */
    339 		donerec = 1;
    340 	}
    341 	vp->tval &= ~STR;	/* mark string invalid */
    342 	vp->tval |= NUM;	/* mark number ok */
    343 	dprintf(("setfval %p: %s = %g, t=%p\n", (void *)vp,
    344 	    vp->nval ? vp->nval : (unsigned char *)"NULL",
    345 	    f, (void *)vp->tval));
    346 	return (vp->fval = f);
    347 }
    348 
    349 void
    350 funnyvar(Cell *vp, char *rw)
    351 {
    352 	if (vp->tval & ARR)
    353 		ERROR "can't %s %s; it's an array name.", rw, vp->nval FATAL;
    354 	if (vp->tval & FCN)
    355 		ERROR "can't %s %s; it's a function.", rw, vp->nval FATAL;
    356 	ERROR "funny variable %o: n=%s s=\"%s\" f=%g t=%o",
    357 	    vp, vp->nval, vp->sval, vp->fval, vp->tval CONT;
    358 }
    359 
    360 uchar *
    361 setsval(Cell *vp, uchar *s)
    362 {
    363 	int	i;
    364 
    365 	if ((vp->tval & (NUM | STR)) == 0)
    366 		funnyvar(vp, "assign to");
    367 	if (vp->tval & FLD) {
    368 		donerec = 0;	/* mark $0 invalid */
    369 		i = fldidx(vp);
    370 		if (i > *NF)
    371 			newfld(i);
    372 		dprintf(("setting field %d to %s\n", i, s));
    373 	} else if (vp->tval & REC) {
    374 		donefld = 0;	/* mark $1... invalid */
    375 		donerec = 1;
    376 	}
    377 	vp->tval &= ~NUM;
    378 	vp->tval |= STR;
    379 	if (freeable(vp))
    380 		xfree(vp->sval);
    381 	vp->tval &= ~DONTFREE;
    382 	dprintf(("setsval %p: %s = \"%s\", t=%p\n",
    383 	    (void *)vp,
    384 	    vp->nval ? (char *)vp->nval : "",
    385 	    s,
    386 	    (void *)(vp->tval ? (char *)vp->tval : "")));
    387 	return (vp->sval = tostring(s));
    388 }
    389 
    390 Awkfloat
    391 r_getfval(Cell *vp)
    392 {
    393 	if ((vp->tval & (NUM | STR)) == 0)
    394 		funnyvar(vp, "read value of");
    395 	if ((vp->tval & FLD) && donefld == 0)
    396 		fldbld();
    397 	else if ((vp->tval & REC) && donerec == 0)
    398 		recbld();
    399 	if (!isnum(vp)) {	/* not a number */
    400 		vp->fval = atof((const char *)vp->sval);	/* best guess */
    401 		if (is_number(vp->sval) && !(vp->tval&CON))
    402 			vp->tval |= NUM;	/* make NUM only sparingly */
    403 	}
    404 	dprintf(("getfval %p: %s = %g, t=%p\n",
    405 	    (void *)vp, vp->nval, vp->fval, (void *)vp->tval));
    406 	return (vp->fval);
    407 }
    408 
    409 uchar *
    410 r_getsval(Cell *vp)
    411 {
    412 	uchar s[256];
    413 
    414 	if ((vp->tval & (NUM | STR)) == 0)
    415 		funnyvar(vp, "read value of");
    416 	if ((vp->tval & FLD) && donefld == 0)
    417 		fldbld();
    418 	else if ((vp->tval & REC) && donerec == 0)
    419 		recbld();
    420 	if ((vp->tval & STR) == 0) {
    421 		if (!(vp->tval&DONTFREE))
    422 			xfree(vp->sval);
    423 		if ((long long)vp->fval == vp->fval) {
    424 			(void) snprintf((char *)s, sizeof (s),
    425 			    "%.20g", vp->fval);
    426 		} else {
    427 			/*LINTED*/
    428 			(void) snprintf((char *)s, sizeof (s),
    429 			    (char *)*OFMT, vp->fval);
    430 		}
    431 		vp->sval = tostring(s);
    432 		vp->tval &= ~DONTFREE;
    433 		vp->tval |= STR;
    434 	}
    435 	dprintf(("getsval %p: %s = \"%s\", t=%p\n",
    436 	    (void *)vp,
    437 	    vp->nval ? (char *)vp->nval : "",
    438 	    vp->sval ? (char *)vp->sval : "",
    439 	    (void *)vp->tval));
    440 	return (vp->sval);
    441 }
    442 
    443 uchar *
    444 tostring(uchar *s)
    445 {
    446 	register uchar *p;
    447 
    448 	p = (uchar *)malloc(strlen((char *)s)+1);
    449 	if (p == NULL)
    450 		ERROR "out of space in tostring on %s", s FATAL;
    451 	(void) strcpy((char *)p, (char *)s);
    452 	return (p);
    453 }
    454 
    455 uchar *
    456 qstring(uchar *s, int delim)	/* collect string up to delim */
    457 {
    458 	uchar *cbuf, *ret;
    459 	int c, n;
    460 	size_t	cbufsz, cnt;
    461 
    462 	init_buf(&cbuf, &cbufsz, LINE_INCR);
    463 
    464 	for (cnt = 0; (c = *s) != delim; s++) {
    465 		if (c == '\n') {
    466 			ERROR "newline in string %.10s...", cbuf SYNTAX;
    467 		} else if (c != '\\') {
    468 			expand_buf(&cbuf, &cbufsz, cnt);
    469 			cbuf[cnt++] = c;
    470 		} else {	/* \something */
    471 			expand_buf(&cbuf, &cbufsz, cnt);
    472 			switch (c = *++s) {
    473 			case '\\':	cbuf[cnt++] = '\\'; break;
    474 			case 'n':	cbuf[cnt++] = '\n'; break;
    475 			case 't':	cbuf[cnt++] = '\t'; break;
    476 			case 'b':	cbuf[cnt++] = '\b'; break;
    477 			case 'f':	cbuf[cnt++] = '\f'; break;
    478 			case 'r':	cbuf[cnt++] = '\r'; break;
    479 			default:
    480 				if (!isdigit(c)) {
    481 					cbuf[cnt++] = c;
    482 					break;
    483 				}
    484 				n = c - '0';
    485 				if (isdigit(s[1])) {
    486 					n = 8 * n + *++s - '0';
    487 					if (isdigit(s[1]))
    488 						n = 8 * n + *++s - '0';
    489 				}
    490 				cbuf[cnt++] = n;
    491 				break;
    492 			}
    493 		}
    494 	}
    495 	cbuf[cnt] = '\0';
    496 	ret = tostring(cbuf);
    497 	free(cbuf);
    498 	return (ret);
    499 }
    500