Home | History | Annotate | Download | only in awk_xpg4
      1 /*
      2  * CDDL HEADER START
      3  *
      4  * The contents of this file are subject to the terms of the
      5  * Common Development and Distribution License (the "License").
      6  * You may not use this file except in compliance with the License.
      7  *
      8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
      9  * or http://www.opensolaris.org/os/licensing.
     10  * See the License for the specific language governing permissions
     11  * and limitations under the License.
     12  *
     13  * When distributing Covered Code, include this CDDL HEADER in each
     14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
     15  * If applicable, add the following below this CDDL HEADER, with the
     16  * fields enclosed by brackets "[]" replaced with your own identifying
     17  * information: Portions Copyright [yyyy] [name of copyright owner]
     18  *
     19  * CDDL HEADER END
     20  */
     21 /*
     22  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
     23  * Use is subject to license terms.
     24  */
     25 
     26 /*
     27  * awk -- common header file.
     28  *
     29  * Copyright 1986, 1994 by Mortice Kern Systems Inc.  All rights reserved.
     30  *
     31  * This version uses the POSIX.2 compatible <regex.h> routines.
     32  *
     33  * Based on MKS awk(1) ported to be /usr/xpg4/bin/awk with POSIX/XCU4 changes
     34  *
     35  */
     36 
     37 #pragma ident	"%Z%%M%	%I%	%E% SMI"
     38 
     39 #include <stdio.h>
     40 #include <ctype.h>
     41 #include <string.h>
     42 #include <math.h>
     43 #include <limits.h>
     44 #include <stdlib.h>
     45 #include <regex.h>
     46 #include <errno.h>
     47 #include <sys/types.h>
     48 #include <locale.h>
     49 #include <wchar.h>
     50 #include <widec.h>
     51 
     52 #define	YYMAXDEPTH	300	/* Max # of productions (used by yacc) */
     53 #define	YYSSIZE		300	/* Size of State/Value stacks (MKS YACC) */
     54 #define	MAXDIGINT	19	/* Number of digits in an INT */
     55 #define	FNULL		((FILE *)0)
     56 #define	NNULL		((NODE *)0)
     57 #define	SNULL		((STRING)0)
     58 #define	LARGE		INT_MAX	/* Large integer */
     59 #define	NPFILE		32	/* Number of -[fl] options allowed */
     60 #define	NRECUR		3000	/* Maximum recursion depth */
     61 
     62 #define	M_LDATA	1
     63 #ifdef M_LDATA
     64 #define	NLINE	20000	/* Longest input record */
     65 #define	NFIELD	4000	/* Number of fields allowed */
     66 #define	NBUCKET	1024	/* # of symtab buckets (power of 2) */
     67 #else
     68 #define	NLINE	2048	/* Longest input record */
     69 #define	NFIELD	1024	/* Number of fields allowed */
     70 #define	NBUCKET	256	/* # of symtab buckets (power of 2) */
     71 #endif
     72 
     73 #define	NSNODE		40	/* Number of cached nodes */
     74 #define	NCONTEXT	50	/* Amount of context for error msgs */
     75 #define	hashbuck(n)	((n)&(NBUCKET-1))
     76 #if	BSD
     77 /*
     78  * A speedup for BSD.  Use their routines which are
     79  * already optimised.  Note that BSD bcopy does not
     80  * return a value.
     81  */
     82 int	bcmp();
     83 #define	memcmp(b1, b2, n)	bcmp(b1, b2, n)
     84 void	bcopy();
     85 #define	memcpy(b1, b2, n)	bcopy(b2, b1, (int)n)
     86 #endif	/* BSD */
     87 #define	vlook(n)	vlookup(n, 0)
     88 
     89 /*
     90  * Basic AWK internal types.
     91  */
     92 typedef	double		REAL;
     93 typedef	long long	INT;
     94 typedef	wchar_t		*STRING;
     95 typedef	struct NODE	*(*FUNCTION)(struct NODE *np);
     96 typedef	void		*REGEXP;
     97 
     98 /*
     99  * Node in the AWK interpreter expression tree.
    100  */
    101 typedef	struct	NODE	{
    102 	ushort_t	n_type;
    103 	struct NODE	*n_next;		/* Symbol table/PARM link */
    104 	ushort_t	n_flags;		/* Node flags, type */
    105 
    106 
    107 
    108 
    109 	union	{
    110 		struct	{
    111 			ushort_t	N_hash;		/* Full hash value */
    112 			struct NODE	*N_alink;	/* Array link */
    113 			union	{
    114 				struct	{
    115 					STRING	N_string;
    116 					size_t	N_strlen;
    117 				}	n_str;
    118 				INT	N_int;
    119 				REAL	N_real;
    120 				FUNCTION	N_function;
    121 				struct	NODE	*N_ufunc;
    122 			}	n_tun;
    123 			wchar_t	N_name[1];
    124 		}	n_term;
    125 		struct	{
    126 			struct	NODE	*N_left;
    127 			struct	NODE	*N_right;
    128 			ushort_t	N_lineno;
    129 		}	n_op;
    130 		struct {
    131 			struct	NODE	*N_left;	/* Used for fliplist */
    132 			struct	NODE	*N_right;
    133 			REGEXP		N_regexp;	/* Regular expression */
    134 		}	n_re;
    135 	}	n_un;
    136 }	NODE;
    137 
    138 /*
    139  * Definitions to make the node access much easier.
    140  */
    141 #define	n_hash		n_un.n_term.N_hash	/* full hash value is sym tbl */
    142 #define	n_scope		n_un.n_term.N_hash	/* local variable scope level */
    143 #define	n_alink		n_un.n_term.N_alink	/* link to array list */
    144 #define	n_string	n_un.n_term.n_tun.n_str.N_string
    145 #define	n_strlen	n_un.n_term.n_tun.n_str.N_strlen
    146 #define	n_int		n_un.n_term.n_tun.N_int
    147 #define	n_real		n_un.n_term.n_tun.N_real
    148 #define	n_function	n_un.n_term.n_tun.N_function
    149 #define	n_ufunc		n_un.n_term.n_tun.N_ufunc
    150 #define	n_name		n_un.n_term.N_name
    151 #define	n_left		n_un.n_op.N_left
    152 #define	n_right		n_un.n_op.N_right
    153 #define	n_lineno	n_un.n_op.N_lineno
    154 #define	n_keywtype	n_un.n_op.N_lineno
    155 #define	n_regexp	n_un.n_re.N_regexp
    156 /*
    157  * Compress the types that are actually used in the final tree
    158  * to save space in the intermediate file. Allows 1 byte to
    159  * represent all types
    160  */
    161 
    162 
    163 
    164 
    165 
    166 
    167 
    168 /*
    169  * n_flags bit assignments.
    170  */
    171 #define	FALLOC		0x01	/* Allocated node */
    172 #define	FSTATIC		0x00	/* Not allocated */
    173 #define	FMATCH		0x02	/* pattern,pattern (first part matches) */
    174 #define	FSPECIAL	0x04	/* Special pre-computed variable */
    175 #define	FINARRAY	0x08	/* NODE installed in N_alink array list */
    176 #define	FNOALLOC	0x10	/* mark node FALLOC, but don't malloc */
    177 #define	FSENSE		0x20	/* Sense if string looks like INT/REAL */
    178 #define	FSAVE		(FSPECIAL|FINARRAY)	/* assign leaves on */
    179 
    180 #define	FINT		0x40	/* Node has integer type */
    181 #define	FREAL		0x80	/* Node has real type */
    182 #define	FSTRING		0x100	/* Node has string type */
    183 #define	FNONTOK		0x200	/* Node has non-token type */
    184 #define	FVINT		0x400	/* Node looks like an integer */
    185 #define	FVREAL		0x800	/* Node looks like a real number */
    186 #define	FLARRAY		0x1000	/* Local array node */
    187 
    188 /*
    189  * n_flags macros
    190  * These work when given an argument of np->n_flags
    191  */
    192 #define	isleaf(f)	(!((f)&FNONTOK))
    193 #define	isstring(f)	((f)&FSTRING)
    194 #define	isastring(f)	(((f)&(FSTRING|FALLOC)) == (FSTRING|FALLOC))
    195 #define	isnumber(f)	((f)&(FINT|FVINT|FREAL|FVREAL))
    196 #define	isreal(f)	((f)&(FREAL|FVREAL))
    197 #define	isint(f)	((f)&(FINT|FVINT))
    198 
    199 /*
    200  * Prototype file size is defined in awksize.h
    201  */
    202 
    203 
    204 
    205 
    206 
    207 /*
    208  * Awkrun prototype default name
    209  */
    210 #if defined(DOS)
    211 #if defined(__386__)
    212 #define	AWK_PROTOTYPE  M_ETCDIR(awkrunf.dos)
    213 #define	AWK_LPROTOTYPE M_ETCDIR(awkrunf.dos)
    214 #else
    215 #define	AWK_PROTOTYPE  M_ETCDIR(awkrun.dos)
    216 #define	AWK_LPROTOTYPE M_ETCDIR(awkrunl.dos)
    217 #endif
    218 #elif defined(OS2)
    219 #define	AWK_PROTOTYPE M_ETCDIR(awkrun.os2)
    220 #elif defined(NT)
    221 #define	AWK_PROTOTYPE M_ETCDIR(awkrun.nt)
    222 #else
    223 #define	AWK_PROTOTYPE M_ETCDIR(awkrun.mod)
    224 #endif
    225 
    226 /*
    227  * This is a kludge that gets around a bug in compact & large
    228  * models under DOS.  It also makes the generated
    229  * code faster even if there wasn't a bug.  UNIX people: try
    230  * to ignore these noisy "near" declarations.
    231  */
    232 #ifndef	DOS
    233 #define	near
    234 #endif
    235 
    236 typedef	wchar_t	near	*LOCCHARP;	/* pointer to local strings */
    237 /*
    238  * Form of builtin symbols
    239  * This should be a union because only one of r_ivalue
    240  * and r_svalue is needed, but (alas) unions cannot be
    241  * initialised.
    242  */
    243 typedef	struct	RESERVED {
    244 	LOCCHARP	r_name;
    245 	int		r_type;		/* Type of node */
    246 	INT		r_ivalue;	/* Integer value or wcslen(r_svalue) */
    247 	STRING		r_svalue;	/* String value */
    248 }	RESERVED;
    249 
    250 /*
    251  * Table of builtin functions.
    252  */
    253 typedef	struct	RESFUNC {
    254 	LOCCHARP	rf_name;
    255 	int		rf_type;	/* FUNC || GETLINE */
    256 	FUNCTION	rf_func;	/* Function pointer */
    257 }	RESFUNC;
    258 
    259 /*
    260  * Structure holding list of open files.
    261  */
    262 typedef	struct	OFILE	{
    263 	ushort_t f_mode;		/* Open mode: WRITE, APPEND, PIPE */
    264 	FILE	*f_fp;			/* File pointer if open */
    265 	char	*f_name;		/* Remembered file name */
    266 }	OFILE;
    267 
    268 /* Global functions -- awk.y */
    269 int	yyparse(void);
    270 
    271 /* Global functions -- awk1.c */
    272 #ifdef __WATCOMC__
    273 #pragma aux yyerror aborts;
    274 #pragma aux awkerr aborts;
    275 #pragma aux awkperr aborts;
    276 #endif
    277 void	yyerror(char *msg, ...);
    278 void	awkerr(char *fmt, ...);
    279 void	awkperr(char *fmt, ...);
    280 void	uexit(NODE *);
    281 int	yylex(void);
    282 NODE	*renode(wchar_t *restr);
    283 wchar_t	*emalloc(unsigned);
    284 wchar_t	*erealloc(wchar_t *, unsigned);
    285 
    286 /* Global functions -- awk2.c */
    287 void	awk(void);
    288 void	dobegin(void);
    289 void	doend(int status);
    290 int	nextrecord(wchar_t *buf, FILE *fp);
    291 wchar_t	*defrecord(wchar_t *bp, int lim, FILE *fp);
    292 wchar_t	*charrecord(wchar_t *bp, int lim, FILE *fp);
    293 wchar_t	*multirecord(wchar_t *bp, int lim, FILE *fp);
    294 wchar_t	*whitefield(wchar_t **endp);
    295 wchar_t	*blackfield(wchar_t **endp);
    296 wchar_t	*refield(wchar_t **endp);
    297 void	s_print(NODE *np);
    298 void	s_prf(NODE *np);
    299 size_t	xprintf(NODE *np, FILE *fp, wchar_t **cp);
    300 void	awkclose(OFILE *op);
    301 
    302 /* Global functions -- awk3.c */
    303 void	strassign(NODE *np, STRING string, int flags, size_t length);
    304 NODE	*nassign(NODE *np, NODE *value);
    305 NODE	*assign(NODE *np, NODE *value);
    306 void	delarray(NODE *np);
    307 NODE	*node(int type, NODE *left, NODE *right);
    308 NODE	*intnode(INT i);
    309 NODE	*realnode(REAL r);
    310 NODE	*stringnode(STRING str, int aflag, size_t wcslen);
    311 NODE	*vlookup(wchar_t *name, int nocreate);
    312 NODE	*emptynode(int type, size_t nlength);
    313 void	freenode(NODE *np);
    314 void	execute(NODE *np);
    315 INT	exprint(NODE *np);
    316 REAL	exprreal(NODE *np);
    317 STRING	exprstring(NODE *np);
    318 STRING	strsave(wchar_t *string);
    319 NODE	*exprreduce(NODE *np);
    320 NODE	*getlist(NODE **npp);
    321 NODE	*symwalk(int *buckp, NODE **npp);
    322 REGEXP	getregexp(NODE *np);
    323 void	addsymtab(NODE *np);
    324 void	delsymtab(NODE *np, int fflag);
    325 NODE	* finstall(LOCCHARP name, FUNCTION f, int type);
    326 void	kinstall(LOCCHARP name, int type);
    327 void	fieldsplit(void);
    328 void	promote(NODE *);
    329 
    330 
    331 
    332 
    333 
    334 
    335 
    336 /* Global functions -- awk4.c */
    337 NODE	*f_exp(NODE *np);
    338 NODE	*f_int(NODE *np);
    339 NODE	*f_log(NODE *np);
    340 NODE	*f_sqrt(NODE *np);
    341 NODE	*f_getline(NODE *np);
    342 NODE	*f_index(NODE *np);
    343 NODE	*f_length(NODE *np);
    344 NODE	*f_split(NODE *np);
    345 NODE	*f_sprintf(NODE *np);
    346 NODE	*f_substr(NODE *np);
    347 NODE	*f_rand(NODE *np);
    348 NODE	*f_srand(NODE *np);
    349 NODE	*f_sin(NODE *np);
    350 NODE	*f_cos(NODE *np);
    351 NODE	*f_atan2(NODE *np);
    352 NODE	*f_sub(NODE *np);
    353 NODE	*f_gsub(NODE *np);
    354 NODE	*f_match(NODE *np);
    355 NODE	*f_system(NODE *np);
    356 NODE	*f_ord(NODE *np);
    357 NODE	*f_tolower(NODE *np);
    358 NODE	*f_toupper(NODE *np);
    359 NODE	*f_close(NODE *np);
    360 NODE	*f_asort(NODE *np);
    361 
    362 /* In awk0.c */
    363 
    364 
    365 
    366 extern	wchar_t	_null[];
    367 extern	char	r[];
    368 extern	char	w[];
    369 extern	wchar_t	s_OFMT[];
    370 extern	wchar_t	s_CONVFMT[];
    371 extern	wchar_t	s_NR[];
    372 extern	wchar_t	s_NF[];
    373 extern	wchar_t	s_OFS[];
    374 extern	wchar_t	s_ORS[];
    375 extern	wchar_t	s_RS[];
    376 extern	wchar_t	s_FS[];
    377 extern	wchar_t	s_FNR[];
    378 extern	wchar_t	s_SUBSEP[];
    379 extern	wchar_t	s_ARGC[], s_ARGV[], s_ENVIRON[];
    380 extern	wchar_t	s_FILENAME[], s_SYMTAB[];
    381 extern	wchar_t	s_BEGIN[], s_END[], s_next[];
    382 extern	wchar_t	_begin[], _end[];
    383 extern	wchar_t	s_exp[], s_getline[], s_index[], s_int[], s_length[], s_log[];
    384 extern	wchar_t	s_split[], s_sprintf[], s_sqrt[], s_substr[];
    385 extern	wchar_t	s_rand[], s_srand[], s_sin[], s_cos[], s_atan2[];
    386 extern	wchar_t	s_sub[], s_gsub[], s_match[], s_system[], s_ord[];
    387 extern	wchar_t	s_toupper[], s_tolower[], s_asort[];
    388 extern	wchar_t	s_close[];
    389 extern	wchar_t	redelim;
    390 extern	unsigned char	inprint;
    391 extern	unsigned char	funparm;
    392 extern	unsigned char	splitdone;
    393 extern	uint_t	npattern;
    394 extern	uint_t	nfield;
    395 extern	uint_t	fcount;
    396 extern	uint_t	phase;
    397 extern	uint_t	running;
    398 extern	uchar_t	catterm;
    399 extern	uint_t	lexlast;
    400 extern	uint_t	lineno;
    401 extern	uchar_t	needsplit, needenviron, doing_begin, begin_getline;
    402 extern	ushort_t	slevel;
    403 extern	ushort_t	loopexit;
    404 extern	wchar_t	radixpoint;
    405 extern	REGEXP	resep;
    406 extern	RESERVED	reserved[];
    407 extern	RESFUNC		resfuncs[];
    408 extern	long	NIOSTREAM;	/* Maximum open I/O streams */
    409 extern	OFILE	*ofiles;
    410 extern	wchar_t	*linebuf;
    411 extern	size_t	lbuflen;
    412 extern	char	interr[];
    413 extern	char	nomem[];
    414 extern	NODE	*symtab[NBUCKET];
    415 extern	NODE	*yytree;
    416 extern	NODE	*freelist;
    417 extern	wchar_t	*(*awkrecord)(wchar_t *, int, FILE *);
    418 extern	wchar_t	*(*awkfield)(wchar_t **);
    419 
    420 extern	NODE	*constant;
    421 extern	NODE	*const0;
    422 extern	NODE	*const1;
    423 extern	NODE	*constundef;
    424 extern	NODE	*field0;
    425 extern	NODE	*incNR;
    426 extern	NODE	*incFNR;
    427 extern	NODE	*clrFNR;
    428 extern	NODE	*ARGVsubi;
    429 extern	NODE	*varNR;
    430 extern	NODE	*varFNR;
    431 extern	NODE	*varNF;
    432 extern	NODE	*varOFMT;
    433 extern	NODE	*varCONVFMT;
    434 extern	NODE	*varOFS;
    435 extern	NODE	*varORS;
    436 extern	NODE	*varFS;
    437 extern	NODE	*varRS;
    438 extern	NODE	*varARGC;
    439 extern	NODE	*varSUBSEP;
    440 extern	NODE	*varENVIRON;
    441 extern	NODE	*varSYMTAB;
    442 extern	NODE	*varFILENAME;
    443 extern	NODE	*curnode;
    444 extern	NODE    *inc_oper;
    445 extern	NODE	*asn_oper;
    446 
    447 extern char *mbunconvert(wchar_t *);
    448 extern	wchar_t 	*mbstowcsdup(char *);
    449 extern	char		*wcstombsdup(wchar_t *);
    450 extern	void		awkerr(char *, ...);
    451 /*
    452  * The following defines the expected max length in chars of a printed number.
    453  * This should be the longest expected size for any type of number
    454  * ie. float, long etc. This number is used to calculate the approximate
    455  * number of chars needed to hold the number.
    456  */
    457 #ifdef M_NUMSIZE
    458 #define	NUMSIZE M_NUMSIZE
    459 #else
    460 #define	NUMSIZE 30
    461 #endif
    462 
    463 #define	M_MB_L(s)	L##s
    464 #ifdef  __STDC__
    465 #define	ANSI(x) x
    466 #else
    467 #define	const
    468 #define	signed
    469 #define	volatile
    470 #define	ANSI(x) ()
    471 #endif
    472 
    473 #define	isWblank(x) (((x) == ' ' || (x) == '\t') ? 1 : 0)
    474 
    475 
    476 /*
    477  * Wide character version of regular expression functions.
    478  */
    479 #define	REGWMATCH_T	int_regwmatch_t
    480 #define	REGWCOMP	int_regwcomp
    481 #define	REGWEXEC	int_regwexec
    482 #define	REGWFREE	int_regwfree
    483 #define	REGWERROR	int_regwerror
    484 #define	REGWDOSUBA	int_regwdosuba
    485 
    486 typedef struct {
    487 	const wchar_t	*rm_sp, *rm_ep;
    488 	regoff_t	rm_so, rm_eo;
    489 } int_regwmatch_t;
    490 
    491 extern int int_regwcomp(REGEXP *, const wchar_t *);
    492 extern int int_regwexec(REGEXP, const wchar_t *, size_t,
    493 			int_regwmatch_t *, int);
    494 extern void int_regwfree(REGEXP);
    495 extern size_t int_regwerror(int, REGEXP, char *, size_t);
    496 extern int int_regwdosuba(REGEXP, const wchar_t *,
    497 			const wchar_t *, wchar_t **, int, int *);
    498