xref: /openbsd-src/usr.bin/awk/awk.h (revision fa9a54978073e7b3f09e0c805c7214dcdf61857c)
1*fa9a5497Smillert /*	$OpenBSD: awk.h,v 1.32 2024/06/03 00:58:04 millert Exp $	*/
26ab05f83Stholo /****************************************************************
307edfa4aSkstailey Copyright (C) Lucent Technologies 1997
46ab05f83Stholo All Rights Reserved
56ab05f83Stholo 
66ab05f83Stholo Permission to use, copy, modify, and distribute this software and
76ab05f83Stholo its documentation for any purpose and without fee is hereby
86ab05f83Stholo granted, provided that the above copyright notice appear in all
96ab05f83Stholo copies and that both that the copyright notice and this
106ab05f83Stholo permission notice and warranty disclaimer appear in supporting
1107edfa4aSkstailey documentation, and that the name Lucent Technologies or any of
1207edfa4aSkstailey its entities not be used in advertising or publicity pertaining
1307edfa4aSkstailey to distribution of the software without specific, written prior
1407edfa4aSkstailey permission.
156ab05f83Stholo 
1607edfa4aSkstailey LUCENT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
1707edfa4aSkstailey INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS.
1807edfa4aSkstailey IN NO EVENT SHALL LUCENT OR ANY OF ITS ENTITIES BE LIABLE FOR ANY
1907edfa4aSkstailey SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
2007edfa4aSkstailey WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER
2107edfa4aSkstailey IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
2207edfa4aSkstailey ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF
2307edfa4aSkstailey THIS SOFTWARE.
246ab05f83Stholo ****************************************************************/
256ab05f83Stholo 
2623cb51abSmillert #include <assert.h>
27d7cce239Smillert #include <stdint.h>
28f81b289fSmillert #include <stdbool.h>
294edce374Smillert #if __STDC_VERSION__ <= 199901L
304edce374Smillert #define noreturn __dead
314edce374Smillert #else
324edce374Smillert #include <stdnoreturn.h>
334edce374Smillert #endif
3423cb51abSmillert 
356ab05f83Stholo typedef double	Awkfloat;
366ab05f83Stholo 
376ab05f83Stholo /* unsigned char is more trouble than it's worth */
386ab05f83Stholo 
396ab05f83Stholo typedef	unsigned char uschar;
406ab05f83Stholo 
41dd20eed5Smillert #define	xfree(a)	{ free((void *)(intptr_t)(a)); (a) = NULL; }
42d7cce239Smillert /*
43d7cce239Smillert  * We sometimes cheat writing read-only pointers to NUL-terminate them
44d7cce239Smillert  * and then put back the original value
45d7cce239Smillert  */
46d7cce239Smillert #define setptr(ptr, a)	(*(char *)(intptr_t)(ptr)) = (a)
476ab05f83Stholo 
485dd7c43cSderaadt #define	NN(p)	((p) ? (p) : "(null)")	/* guaranteed non-null for DPRINTF
499a69093aSmillert */
506ab05f83Stholo #define	DEBUG
516ab05f83Stholo #ifdef	DEBUG
52115bd590Smillert #	define	DPRINTF(...)	if (dbg) printf(__VA_ARGS__)
536ab05f83Stholo #else
54115bd590Smillert #	define	DPRINTF(...)
556ab05f83Stholo #endif
566ab05f83Stholo 
57f81b289fSmillert extern enum compile_states {
58f81b289fSmillert 	RUNNING,
59f81b289fSmillert 	COMPILING,
60f81b289fSmillert 	ERROR_PRINTING
61f81b289fSmillert } compile_time;
62f81b289fSmillert 
63f81b289fSmillert extern bool	safe;		/* false => unsafe, true => safe */
64ef789615Smillert extern bool	do_posix;	/* true if POSIXLY_CORRECT set */
656ab05f83Stholo 
6607edfa4aSkstailey #define	RECSIZE	(8 * 1024)	/* sets limit on records, fields, etc., etc. */
6707edfa4aSkstailey extern int	recsize;	/* size of current record, orig RECSIZE */
686ab05f83Stholo 
69c35264f9Smillert extern size_t	awk_mb_cur_max;	/* max size of a multi-byte character */
70c35264f9Smillert 
71fabd211eSmillert extern char	EMPTY[];	/* this avoid -Wwritable-strings issues */
726ab05f83Stholo extern char	**FS;
736ab05f83Stholo extern char	**RS;
746ab05f83Stholo extern char	**ORS;
756ab05f83Stholo extern char	**OFS;
766ab05f83Stholo extern char	**OFMT;
776ab05f83Stholo extern Awkfloat *NR;
786ab05f83Stholo extern Awkfloat *FNR;
796ab05f83Stholo extern Awkfloat *NF;
806ab05f83Stholo extern char	**FILENAME;
816ab05f83Stholo extern char	**SUBSEP;
826ab05f83Stholo extern Awkfloat *RSTART;
836ab05f83Stholo extern Awkfloat *RLENGTH;
846ab05f83Stholo 
85a886e62eSmillert extern bool	CSV;		/* true for csv input */
86a886e62eSmillert 
876ab05f83Stholo extern char	*record;	/* points to $0 */
886ab05f83Stholo extern int	lineno;		/* line number in awk program */
896ab05f83Stholo extern int	errorflag;	/* 1 if error has occurred */
90f81b289fSmillert extern bool	donefld;	/* true if record broken into fields */
91f81b289fSmillert extern bool	donerec;	/* true if record is valid (no fld has changed */
926ab05f83Stholo extern int	dbg;
936ab05f83Stholo 
94d7cce239Smillert extern const char *patbeg;	/* beginning of pattern matched */
956ab05f83Stholo extern	int	patlen;		/* length of pattern matched.  set in b.c */
966ab05f83Stholo 
976ab05f83Stholo /* Cell:  all information about a variable or constant */
986ab05f83Stholo 
996ab05f83Stholo typedef struct Cell {
1006ab05f83Stholo 	uschar	ctype;		/* OCELL, OBOOL, OJUMP, etc. */
1016ab05f83Stholo 	uschar	csub;		/* CCON, CTEMP, CFLD, etc. */
1026ab05f83Stholo 	char	*nval;		/* name, for variables only */
1036ab05f83Stholo 	char	*sval;		/* string value */
1046ab05f83Stholo 	Awkfloat fval;		/* value as number */
105c062391aSmillert 	int	 tval;		/* type info: STR|NUM|ARR|FCN|FLD|CON|DONTFREE|CONVC|CONVO */
106c062391aSmillert 	char	*fmt;		/* CONVFMT/OFMT value used to convert from number */
1076ab05f83Stholo 	struct Cell *cnext;	/* ptr to next if chained */
1086ab05f83Stholo } Cell;
1096ab05f83Stholo 
11007edfa4aSkstailey typedef struct Array {		/* symbol table array */
1116ab05f83Stholo 	int	nelem;		/* elements in table right now */
1126ab05f83Stholo 	int	size;		/* size of tab */
1136ab05f83Stholo 	Cell	**tab;		/* hash table pointers */
1146ab05f83Stholo } Array;
1156ab05f83Stholo 
1166ab05f83Stholo #define	NSYMTAB	50	/* initial size of a symbol table */
1176ab05f83Stholo extern Array	*symtab;
1186ab05f83Stholo 
1196ab05f83Stholo extern Cell	*nrloc;		/* NR */
1206ab05f83Stholo extern Cell	*fnrloc;	/* FNR */
12102265e66Smillert extern Cell	*fsloc;		/* FS */
1226ab05f83Stholo extern Cell	*nfloc;		/* NF */
12302265e66Smillert extern Cell	*ofsloc;	/* OFS */
12402265e66Smillert extern Cell	*orsloc;	/* ORS */
12502265e66Smillert extern Cell	*rsloc;		/* RS */
1266ab05f83Stholo extern Cell	*rstartloc;	/* RSTART */
1276ab05f83Stholo extern Cell	*rlengthloc;	/* RLENGTH */
12802265e66Smillert extern Cell	*subseploc;	/* SUBSEP */
129203f9af3Smillert extern Cell	*symtabloc;	/* SYMTAB */
1306ab05f83Stholo 
1316ab05f83Stholo /* Cell.tval values: */
1326ab05f83Stholo #define	NUM	01	/* number value is valid */
1336ab05f83Stholo #define	STR	02	/* string value is valid */
1346ab05f83Stholo #define DONTFREE 04	/* string space is not freeable */
1356ab05f83Stholo #define	CON	010	/* this is a constant */
1366ab05f83Stholo #define	ARR	020	/* this is an array */
1376ab05f83Stholo #define	FCN	040	/* this is a function name */
1386ab05f83Stholo #define FLD	0100	/* this is a field $1, $2, ... */
1396ab05f83Stholo #define	REC	0200	/* this is $0 */
140c062391aSmillert #define CONVC	0400	/* string was converted from number via CONVFMT */
141c062391aSmillert #define CONVO	01000	/* string was converted from number via OFMT */
1426ab05f83Stholo 
1436ab05f83Stholo 
1446ab05f83Stholo /* function types */
1456ab05f83Stholo #define	FLENGTH	1
1466ab05f83Stholo #define	FSQRT	2
1476ab05f83Stholo #define	FEXP	3
1486ab05f83Stholo #define	FLOG	4
1496ab05f83Stholo #define	FINT	5
1506ab05f83Stholo #define	FSYSTEM	6
1516ab05f83Stholo #define	FRAND	7
1526ab05f83Stholo #define	FSRAND	8
1536ab05f83Stholo #define	FSIN	9
1546ab05f83Stholo #define	FCOS	10
1556ab05f83Stholo #define	FATAN	11
1566ab05f83Stholo #define	FTOUPPER 12
1576ab05f83Stholo #define	FTOLOWER 13
1586ab05f83Stholo #define	FFLUSH	14
15977d6dbf8Spyr #define FAND	15
16077d6dbf8Spyr #define FFOR	16
16177d6dbf8Spyr #define FXOR	17
16277d6dbf8Spyr #define FCOMPL	18
16377d6dbf8Spyr #define FLSHIFT	19
16477d6dbf8Spyr #define FRSHIFT	20
165b79e3a79Smillert #define FSYSTIME	21
166b79e3a79Smillert #define FSTRFTIME	22
1672ed04c87Smillert #define FMKTIME	23
1686ab05f83Stholo 
1696ab05f83Stholo /* Node:  parse tree is made of nodes, with Cell's at bottom */
1706ab05f83Stholo 
1716ab05f83Stholo typedef struct Node {
1726ab05f83Stholo 	int	ntype;
1736ab05f83Stholo 	struct	Node *nnext;
1746ab05f83Stholo 	int	lineno;
1756ab05f83Stholo 	int	nobj;
1766ab05f83Stholo 	struct	Node *narg[1];	/* variable: actual size set by calling malloc */
1776ab05f83Stholo } Node;
1786ab05f83Stholo 
1796ab05f83Stholo #define	NIL	((Node *) 0)
1806ab05f83Stholo 
1816ab05f83Stholo extern Node	*winner;
1826ab05f83Stholo extern Node	*nullnode;
1836ab05f83Stholo 
1846ab05f83Stholo /* ctypes */
1856ab05f83Stholo #define OCELL	1
1866ab05f83Stholo #define OBOOL	2
1876ab05f83Stholo #define OJUMP	3
1886ab05f83Stholo 
1896ab05f83Stholo /* Cell subtypes: csub */
1906ab05f83Stholo #define	CFREE	7
1916ab05f83Stholo #define CCOPY	6
1926ab05f83Stholo #define CCON	5
1936ab05f83Stholo #define CTEMP	4
1946ab05f83Stholo #define CNAME	3
1956ab05f83Stholo #define CVAR	2
1966ab05f83Stholo #define CFLD	1
1976ab05f83Stholo #define	CUNK	0
1986ab05f83Stholo 
1996ab05f83Stholo /* bool subtypes */
2006ab05f83Stholo #define BTRUE	11
2016ab05f83Stholo #define BFALSE	12
2026ab05f83Stholo 
2036ab05f83Stholo /* jump subtypes */
2046ab05f83Stholo #define JEXIT	21
2056ab05f83Stholo #define JNEXT	22
2066ab05f83Stholo #define	JBREAK	23
2076ab05f83Stholo #define	JCONT	24
2086ab05f83Stholo #define	JRET	25
2096ab05f83Stholo #define	JNEXTFILE	26
2106ab05f83Stholo 
2116ab05f83Stholo /* node types */
2126ab05f83Stholo #define NVALUE	1
2136ab05f83Stholo #define NSTAT	2
2146ab05f83Stholo #define NEXPR	3
2156ab05f83Stholo 
2166ab05f83Stholo 
2170a80cfdaSmillert extern	int	pairstack[], paircnt;
2186ab05f83Stholo 
2196ab05f83Stholo #define notlegal(n)	(n <= FIRSTTOKEN || n >= LASTTOKEN || proctab[n-FIRSTTOKEN] == nullproc)
2206ab05f83Stholo #define isvalue(n)	((n)->ntype == NVALUE)
2216ab05f83Stholo #define isexpr(n)	((n)->ntype == NEXPR)
2226ab05f83Stholo #define isjump(n)	((n)->ctype == OJUMP)
2236ab05f83Stholo #define isexit(n)	((n)->csub == JEXIT)
2246ab05f83Stholo #define	isbreak(n)	((n)->csub == JBREAK)
2256ab05f83Stholo #define	iscont(n)	((n)->csub == JCONT)
226a27f5228Smillert #define	isnext(n)	((n)->csub == JNEXT || (n)->csub == JNEXTFILE)
2276ab05f83Stholo #define	isret(n)	((n)->csub == JRET)
22807edfa4aSkstailey #define isrec(n)	((n)->tval & REC)
22907edfa4aSkstailey #define isfld(n)	((n)->tval & FLD)
2306ab05f83Stholo #define isstr(n)	((n)->tval & STR)
2316ab05f83Stholo #define isnum(n)	((n)->tval & NUM)
2326ab05f83Stholo #define isarr(n)	((n)->tval & ARR)
23307edfa4aSkstailey #define isfcn(n)	((n)->tval & FCN)
2346ab05f83Stholo #define istrue(n)	((n)->csub == BTRUE)
2356ab05f83Stholo #define istemp(n)	((n)->csub == CTEMP)
2366ab05f83Stholo #define	isargument(n)	((n)->nobj == ARG)
23707edfa4aSkstailey /* #define freeable(p)	(!((p)->tval & DONTFREE)) */
23807edfa4aSkstailey #define freeable(p)	( ((p)->tval & (STR|DONTFREE)) == STR )
2396ab05f83Stholo 
2406ab05f83Stholo /* structures used by regular expression matching machinery, mostly b.c: */
2416ab05f83Stholo 
242a886e62eSmillert #define NCHARS	(1256+3)		/* 256 handles 8-bit chars; 128 does 7-bit */
243a886e62eSmillert 				/* BUG: some overflows (caught) if we use 256 */
2446ab05f83Stholo 				/* watch out in match(), etc. */
245c0fa3611Smillert #define	HAT	(NCHARS+2)	/* matches ^ in regular expr */
2466ab05f83Stholo #define NSTATES	32
2476ab05f83Stholo 
2486ab05f83Stholo typedef struct rrow {
249271018d0Smillert 	long	ltype;	/* long avoids pointer warnings on 64-bit */
2506ab05f83Stholo 	union {
2516ab05f83Stholo 		int i;
2526ab05f83Stholo 		Node *np;
253a27f5228Smillert 		uschar *up;
254a886e62eSmillert 		int *rp; /* rune representation of char class */
2556ab05f83Stholo 	} lval;		/* because Al stores a pointer in it! */
2560a80cfdaSmillert 	int	*lfollow;
2576ab05f83Stholo } rrow;
2586ab05f83Stholo 
259c83d5272Smillert typedef struct gtte { /* gototab entry */
260a886e62eSmillert 	unsigned int ch;
261a886e62eSmillert 	unsigned int state;
262c83d5272Smillert } gtte;
263c83d5272Smillert 
264c83d5272Smillert typedef struct gtt {	/* gototab */
265c83d5272Smillert 	size_t	allocated;
266c83d5272Smillert 	size_t	inuse;
267c83d5272Smillert 	gtte	*entries;
268a886e62eSmillert } gtt;
269a886e62eSmillert 
2706ab05f83Stholo typedef struct fa {
271c83d5272Smillert 	gtt	*gototab;
27277a7feafSmillert 	uschar	*out;
273a27f5228Smillert 	uschar	*restr;
27477a7feafSmillert 	int	**posns;
27577a7feafSmillert 	int	state_count;
276f81b289fSmillert 	bool	anchor;
2776ab05f83Stholo 	int	use;
2786ab05f83Stholo 	int	initstat;
2796ab05f83Stholo 	int	curstat;
2806ab05f83Stholo 	int	accept;
281271018d0Smillert 	struct	rrow re[1];	/* variable: actual size set by calling malloc */
2826ab05f83Stholo } fa;
2836ab05f83Stholo 
2846ab05f83Stholo 
2856ab05f83Stholo #include "proto.h"
286