1*74a4d8c2SCharles.Forsyth /****************************************************************
2*74a4d8c2SCharles.Forsyth Copyright (C) Lucent Technologies 1997
3*74a4d8c2SCharles.Forsyth All Rights Reserved
4*74a4d8c2SCharles.Forsyth
5*74a4d8c2SCharles.Forsyth Permission to use, copy, modify, and distribute this software and
6*74a4d8c2SCharles.Forsyth its documentation for any purpose and without fee is hereby
7*74a4d8c2SCharles.Forsyth granted, provided that the above copyright notice appear in all
8*74a4d8c2SCharles.Forsyth copies and that both that the copyright notice and this
9*74a4d8c2SCharles.Forsyth permission notice and warranty disclaimer appear in supporting
10*74a4d8c2SCharles.Forsyth documentation, and that the name Lucent Technologies or any of
11*74a4d8c2SCharles.Forsyth its entities not be used in advertising or publicity pertaining
12*74a4d8c2SCharles.Forsyth to distribution of the software without specific, written prior
13*74a4d8c2SCharles.Forsyth permission.
14*74a4d8c2SCharles.Forsyth
15*74a4d8c2SCharles.Forsyth LUCENT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
16*74a4d8c2SCharles.Forsyth INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS.
17*74a4d8c2SCharles.Forsyth IN NO EVENT SHALL LUCENT OR ANY OF ITS ENTITIES BE LIABLE FOR ANY
18*74a4d8c2SCharles.Forsyth SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
19*74a4d8c2SCharles.Forsyth WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER
20*74a4d8c2SCharles.Forsyth IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
21*74a4d8c2SCharles.Forsyth ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF
22*74a4d8c2SCharles.Forsyth THIS SOFTWARE.
23*74a4d8c2SCharles.Forsyth ****************************************************************/
24*74a4d8c2SCharles.Forsyth
25*74a4d8c2SCharles.Forsyth #include <stdio.h>
26*74a4d8c2SCharles.Forsyth #include <stdlib.h>
27*74a4d8c2SCharles.Forsyth #include <string.h>
28*74a4d8c2SCharles.Forsyth #include <ctype.h>
29*74a4d8c2SCharles.Forsyth #include "awk.h"
30*74a4d8c2SCharles.Forsyth #include "ytab.h"
31*74a4d8c2SCharles.Forsyth
32*74a4d8c2SCharles.Forsyth extern YYSTYPE yylval;
33*74a4d8c2SCharles.Forsyth extern int infunc;
34*74a4d8c2SCharles.Forsyth
35*74a4d8c2SCharles.Forsyth int lineno = 1;
36*74a4d8c2SCharles.Forsyth int bracecnt = 0;
37*74a4d8c2SCharles.Forsyth int brackcnt = 0;
38*74a4d8c2SCharles.Forsyth int parencnt = 0;
39*74a4d8c2SCharles.Forsyth
40*74a4d8c2SCharles.Forsyth typedef struct Keyword {
41*74a4d8c2SCharles.Forsyth char *word;
42*74a4d8c2SCharles.Forsyth int sub;
43*74a4d8c2SCharles.Forsyth int type;
44*74a4d8c2SCharles.Forsyth } Keyword;
45*74a4d8c2SCharles.Forsyth
46*74a4d8c2SCharles.Forsyth Keyword keywords[] ={ /* keep sorted: binary searched */
47*74a4d8c2SCharles.Forsyth { "BEGIN", XBEGIN, XBEGIN },
48*74a4d8c2SCharles.Forsyth { "END", XEND, XEND },
49*74a4d8c2SCharles.Forsyth { "NF", VARNF, VARNF },
50*74a4d8c2SCharles.Forsyth { "atan2", FATAN, BLTIN },
51*74a4d8c2SCharles.Forsyth { "break", BREAK, BREAK },
52*74a4d8c2SCharles.Forsyth { "close", CLOSE, CLOSE },
53*74a4d8c2SCharles.Forsyth { "continue", CONTINUE, CONTINUE },
54*74a4d8c2SCharles.Forsyth { "cos", FCOS, BLTIN },
55*74a4d8c2SCharles.Forsyth { "delete", DELETE, DELETE },
56*74a4d8c2SCharles.Forsyth { "do", DO, DO },
57*74a4d8c2SCharles.Forsyth { "else", ELSE, ELSE },
58*74a4d8c2SCharles.Forsyth { "exit", EXIT, EXIT },
59*74a4d8c2SCharles.Forsyth { "exp", FEXP, BLTIN },
60*74a4d8c2SCharles.Forsyth { "fflush", FFLUSH, BLTIN },
61*74a4d8c2SCharles.Forsyth { "for", FOR, FOR },
62*74a4d8c2SCharles.Forsyth { "func", FUNC, FUNC },
63*74a4d8c2SCharles.Forsyth { "function", FUNC, FUNC },
64*74a4d8c2SCharles.Forsyth { "getline", GETLINE, GETLINE },
65*74a4d8c2SCharles.Forsyth { "gsub", GSUB, GSUB },
66*74a4d8c2SCharles.Forsyth { "if", IF, IF },
67*74a4d8c2SCharles.Forsyth { "in", IN, IN },
68*74a4d8c2SCharles.Forsyth { "index", INDEX, INDEX },
69*74a4d8c2SCharles.Forsyth { "int", FINT, BLTIN },
70*74a4d8c2SCharles.Forsyth { "length", FLENGTH, BLTIN },
71*74a4d8c2SCharles.Forsyth { "log", FLOG, BLTIN },
72*74a4d8c2SCharles.Forsyth { "match", MATCHFCN, MATCHFCN },
73*74a4d8c2SCharles.Forsyth { "next", NEXT, NEXT },
74*74a4d8c2SCharles.Forsyth { "nextfile", NEXTFILE, NEXTFILE },
75*74a4d8c2SCharles.Forsyth { "print", PRINT, PRINT },
76*74a4d8c2SCharles.Forsyth { "printf", PRINTF, PRINTF },
77*74a4d8c2SCharles.Forsyth { "rand", FRAND, BLTIN },
78*74a4d8c2SCharles.Forsyth { "return", RETURN, RETURN },
79*74a4d8c2SCharles.Forsyth { "sin", FSIN, BLTIN },
80*74a4d8c2SCharles.Forsyth { "split", SPLIT, SPLIT },
81*74a4d8c2SCharles.Forsyth { "sprintf", SPRINTF, SPRINTF },
82*74a4d8c2SCharles.Forsyth { "sqrt", FSQRT, BLTIN },
83*74a4d8c2SCharles.Forsyth { "srand", FSRAND, BLTIN },
84*74a4d8c2SCharles.Forsyth { "sub", SUB, SUB },
85*74a4d8c2SCharles.Forsyth { "substr", SUBSTR, SUBSTR },
86*74a4d8c2SCharles.Forsyth { "system", FSYSTEM, BLTIN },
87*74a4d8c2SCharles.Forsyth { "tolower", FTOLOWER, BLTIN },
88*74a4d8c2SCharles.Forsyth { "toupper", FTOUPPER, BLTIN },
89*74a4d8c2SCharles.Forsyth { "while", WHILE, WHILE },
90*74a4d8c2SCharles.Forsyth };
91*74a4d8c2SCharles.Forsyth
92*74a4d8c2SCharles.Forsyth #define DEBUG
93*74a4d8c2SCharles.Forsyth #ifdef DEBUG
94*74a4d8c2SCharles.Forsyth #define RET(x) { if(dbg)printf("lex %s\n", tokname(x)); return(x); }
95*74a4d8c2SCharles.Forsyth #else
96*74a4d8c2SCharles.Forsyth #define RET(x) return(x)
97*74a4d8c2SCharles.Forsyth #endif
98*74a4d8c2SCharles.Forsyth
peek(void)99*74a4d8c2SCharles.Forsyth int peek(void)
100*74a4d8c2SCharles.Forsyth {
101*74a4d8c2SCharles.Forsyth int c = input();
102*74a4d8c2SCharles.Forsyth unput(c);
103*74a4d8c2SCharles.Forsyth return c;
104*74a4d8c2SCharles.Forsyth }
105*74a4d8c2SCharles.Forsyth
gettok(char ** pbuf,int * psz)106*74a4d8c2SCharles.Forsyth int gettok(char **pbuf, int *psz) /* get next input token */
107*74a4d8c2SCharles.Forsyth {
108*74a4d8c2SCharles.Forsyth int c;
109*74a4d8c2SCharles.Forsyth char *buf = *pbuf;
110*74a4d8c2SCharles.Forsyth int sz = *psz;
111*74a4d8c2SCharles.Forsyth char *bp = buf;
112*74a4d8c2SCharles.Forsyth
113*74a4d8c2SCharles.Forsyth c = input();
114*74a4d8c2SCharles.Forsyth if (c == 0)
115*74a4d8c2SCharles.Forsyth return 0;
116*74a4d8c2SCharles.Forsyth buf[0] = c;
117*74a4d8c2SCharles.Forsyth buf[1] = 0;
118*74a4d8c2SCharles.Forsyth if (!isalnum(c) && c != '.' && c != '_')
119*74a4d8c2SCharles.Forsyth return c;
120*74a4d8c2SCharles.Forsyth
121*74a4d8c2SCharles.Forsyth *bp++ = c;
122*74a4d8c2SCharles.Forsyth if (isalpha(c) || c == '_') { /* it's a varname */
123*74a4d8c2SCharles.Forsyth for ( ; (c = input()) != 0; ) {
124*74a4d8c2SCharles.Forsyth if (bp-buf >= sz)
125*74a4d8c2SCharles.Forsyth if (!adjbuf(&buf, &sz, bp-buf+2, 100, &bp, 0))
126*74a4d8c2SCharles.Forsyth FATAL( "out of space for name %.10s...", buf );
127*74a4d8c2SCharles.Forsyth if (isalnum(c) || c == '_')
128*74a4d8c2SCharles.Forsyth *bp++ = c;
129*74a4d8c2SCharles.Forsyth else {
130*74a4d8c2SCharles.Forsyth *bp = 0;
131*74a4d8c2SCharles.Forsyth unput(c);
132*74a4d8c2SCharles.Forsyth break;
133*74a4d8c2SCharles.Forsyth }
134*74a4d8c2SCharles.Forsyth }
135*74a4d8c2SCharles.Forsyth *bp = 0;
136*74a4d8c2SCharles.Forsyth } else { /* it's a number */
137*74a4d8c2SCharles.Forsyth char *rem;
138*74a4d8c2SCharles.Forsyth /* read input until can't be a number */
139*74a4d8c2SCharles.Forsyth for ( ; (c = input()) != 0; ) {
140*74a4d8c2SCharles.Forsyth if (bp-buf >= sz)
141*74a4d8c2SCharles.Forsyth if (!adjbuf(&buf, &sz, bp-buf+2, 100, &bp, 0))
142*74a4d8c2SCharles.Forsyth FATAL( "out of space for number %.10s...", buf );
143*74a4d8c2SCharles.Forsyth if (isdigit(c) || c == 'e' || c == 'E'
144*74a4d8c2SCharles.Forsyth || c == '.' || c == '+' || c == '-')
145*74a4d8c2SCharles.Forsyth *bp++ = c;
146*74a4d8c2SCharles.Forsyth else {
147*74a4d8c2SCharles.Forsyth unput(c);
148*74a4d8c2SCharles.Forsyth break;
149*74a4d8c2SCharles.Forsyth }
150*74a4d8c2SCharles.Forsyth }
151*74a4d8c2SCharles.Forsyth *bp = 0;
152*74a4d8c2SCharles.Forsyth strtod(buf, &rem); /* parse the number */
153*74a4d8c2SCharles.Forsyth unputstr(rem); /* put rest back for later */
154*74a4d8c2SCharles.Forsyth rem[0] = 0;
155*74a4d8c2SCharles.Forsyth }
156*74a4d8c2SCharles.Forsyth *pbuf = buf;
157*74a4d8c2SCharles.Forsyth *psz = sz;
158*74a4d8c2SCharles.Forsyth return buf[0];
159*74a4d8c2SCharles.Forsyth }
160*74a4d8c2SCharles.Forsyth
161*74a4d8c2SCharles.Forsyth int word(char *);
162*74a4d8c2SCharles.Forsyth int string(void);
163*74a4d8c2SCharles.Forsyth int regexpr(void);
164*74a4d8c2SCharles.Forsyth int sc = 0; /* 1 => return a } right now */
165*74a4d8c2SCharles.Forsyth int reg = 0; /* 1 => return a REGEXPR now */
166*74a4d8c2SCharles.Forsyth
yylex(void)167*74a4d8c2SCharles.Forsyth int yylex(void)
168*74a4d8c2SCharles.Forsyth {
169*74a4d8c2SCharles.Forsyth int c;
170*74a4d8c2SCharles.Forsyth static char *buf = 0;
171*74a4d8c2SCharles.Forsyth static int bufsize = 500;
172*74a4d8c2SCharles.Forsyth
173*74a4d8c2SCharles.Forsyth if (buf == 0 && (buf = (char *) malloc(bufsize)) == NULL)
174*74a4d8c2SCharles.Forsyth FATAL( "out of space in yylex" );
175*74a4d8c2SCharles.Forsyth if (sc) {
176*74a4d8c2SCharles.Forsyth sc = 0;
177*74a4d8c2SCharles.Forsyth RET('}');
178*74a4d8c2SCharles.Forsyth }
179*74a4d8c2SCharles.Forsyth if (reg) {
180*74a4d8c2SCharles.Forsyth reg = 0;
181*74a4d8c2SCharles.Forsyth return regexpr();
182*74a4d8c2SCharles.Forsyth }
183*74a4d8c2SCharles.Forsyth for (;;) {
184*74a4d8c2SCharles.Forsyth c = gettok(&buf, &bufsize);
185*74a4d8c2SCharles.Forsyth if (c == 0)
186*74a4d8c2SCharles.Forsyth return 0;
187*74a4d8c2SCharles.Forsyth if (isalpha(c) || c == '_')
188*74a4d8c2SCharles.Forsyth return word(buf);
189*74a4d8c2SCharles.Forsyth if (isdigit(c) || c == '.') {
190*74a4d8c2SCharles.Forsyth yylval.cp = setsymtab(buf, tostring(buf), atof(buf), CON|NUM, symtab);
191*74a4d8c2SCharles.Forsyth /* should this also have STR set? */
192*74a4d8c2SCharles.Forsyth RET(NUMBER);
193*74a4d8c2SCharles.Forsyth }
194*74a4d8c2SCharles.Forsyth
195*74a4d8c2SCharles.Forsyth yylval.i = c;
196*74a4d8c2SCharles.Forsyth switch (c) {
197*74a4d8c2SCharles.Forsyth case '\n': /* {EOL} */
198*74a4d8c2SCharles.Forsyth RET(NL);
199*74a4d8c2SCharles.Forsyth case '\r': /* assume \n is coming */
200*74a4d8c2SCharles.Forsyth case ' ': /* {WS}+ */
201*74a4d8c2SCharles.Forsyth case '\t':
202*74a4d8c2SCharles.Forsyth break;
203*74a4d8c2SCharles.Forsyth case '#': /* #.* strip comments */
204*74a4d8c2SCharles.Forsyth while ((c = input()) != '\n' && c != 0)
205*74a4d8c2SCharles.Forsyth ;
206*74a4d8c2SCharles.Forsyth unput(c);
207*74a4d8c2SCharles.Forsyth break;
208*74a4d8c2SCharles.Forsyth case ';':
209*74a4d8c2SCharles.Forsyth RET(';');
210*74a4d8c2SCharles.Forsyth case '\\':
211*74a4d8c2SCharles.Forsyth if (peek() == '\n') {
212*74a4d8c2SCharles.Forsyth input();
213*74a4d8c2SCharles.Forsyth } else if (peek() == '\r') {
214*74a4d8c2SCharles.Forsyth input(); input(); /* \n */
215*74a4d8c2SCharles.Forsyth lineno++;
216*74a4d8c2SCharles.Forsyth } else {
217*74a4d8c2SCharles.Forsyth RET(c);
218*74a4d8c2SCharles.Forsyth }
219*74a4d8c2SCharles.Forsyth break;
220*74a4d8c2SCharles.Forsyth case '&':
221*74a4d8c2SCharles.Forsyth if (peek() == '&') {
222*74a4d8c2SCharles.Forsyth input(); RET(AND);
223*74a4d8c2SCharles.Forsyth } else
224*74a4d8c2SCharles.Forsyth RET('&');
225*74a4d8c2SCharles.Forsyth case '|':
226*74a4d8c2SCharles.Forsyth if (peek() == '|') {
227*74a4d8c2SCharles.Forsyth input(); RET(BOR);
228*74a4d8c2SCharles.Forsyth } else
229*74a4d8c2SCharles.Forsyth RET('|');
230*74a4d8c2SCharles.Forsyth case '!':
231*74a4d8c2SCharles.Forsyth if (peek() == '=') {
232*74a4d8c2SCharles.Forsyth input(); yylval.i = NE; RET(NE);
233*74a4d8c2SCharles.Forsyth } else if (peek() == '~') {
234*74a4d8c2SCharles.Forsyth input(); yylval.i = NOTMATCH; RET(MATCHOP);
235*74a4d8c2SCharles.Forsyth } else
236*74a4d8c2SCharles.Forsyth RET(NOT);
237*74a4d8c2SCharles.Forsyth case '~':
238*74a4d8c2SCharles.Forsyth yylval.i = MATCH;
239*74a4d8c2SCharles.Forsyth RET(MATCHOP);
240*74a4d8c2SCharles.Forsyth case '<':
241*74a4d8c2SCharles.Forsyth if (peek() == '=') {
242*74a4d8c2SCharles.Forsyth input(); yylval.i = LE; RET(LE);
243*74a4d8c2SCharles.Forsyth } else {
244*74a4d8c2SCharles.Forsyth yylval.i = LT; RET(LT);
245*74a4d8c2SCharles.Forsyth }
246*74a4d8c2SCharles.Forsyth case '=':
247*74a4d8c2SCharles.Forsyth if (peek() == '=') {
248*74a4d8c2SCharles.Forsyth input(); yylval.i = EQ; RET(EQ);
249*74a4d8c2SCharles.Forsyth } else {
250*74a4d8c2SCharles.Forsyth yylval.i = ASSIGN; RET(ASGNOP);
251*74a4d8c2SCharles.Forsyth }
252*74a4d8c2SCharles.Forsyth case '>':
253*74a4d8c2SCharles.Forsyth if (peek() == '=') {
254*74a4d8c2SCharles.Forsyth input(); yylval.i = GE; RET(GE);
255*74a4d8c2SCharles.Forsyth } else if (peek() == '>') {
256*74a4d8c2SCharles.Forsyth input(); yylval.i = APPEND; RET(APPEND);
257*74a4d8c2SCharles.Forsyth } else {
258*74a4d8c2SCharles.Forsyth yylval.i = GT; RET(GT);
259*74a4d8c2SCharles.Forsyth }
260*74a4d8c2SCharles.Forsyth case '+':
261*74a4d8c2SCharles.Forsyth if (peek() == '+') {
262*74a4d8c2SCharles.Forsyth input(); yylval.i = INCR; RET(INCR);
263*74a4d8c2SCharles.Forsyth } else if (peek() == '=') {
264*74a4d8c2SCharles.Forsyth input(); yylval.i = ADDEQ; RET(ASGNOP);
265*74a4d8c2SCharles.Forsyth } else
266*74a4d8c2SCharles.Forsyth RET('+');
267*74a4d8c2SCharles.Forsyth case '-':
268*74a4d8c2SCharles.Forsyth if (peek() == '-') {
269*74a4d8c2SCharles.Forsyth input(); yylval.i = DECR; RET(DECR);
270*74a4d8c2SCharles.Forsyth } else if (peek() == '=') {
271*74a4d8c2SCharles.Forsyth input(); yylval.i = SUBEQ; RET(ASGNOP);
272*74a4d8c2SCharles.Forsyth } else
273*74a4d8c2SCharles.Forsyth RET('-');
274*74a4d8c2SCharles.Forsyth case '*':
275*74a4d8c2SCharles.Forsyth if (peek() == '=') { /* *= */
276*74a4d8c2SCharles.Forsyth input(); yylval.i = MULTEQ; RET(ASGNOP);
277*74a4d8c2SCharles.Forsyth } else if (peek() == '*') { /* ** or **= */
278*74a4d8c2SCharles.Forsyth input(); /* eat 2nd * */
279*74a4d8c2SCharles.Forsyth if (peek() == '=') {
280*74a4d8c2SCharles.Forsyth input(); yylval.i = POWEQ; RET(ASGNOP);
281*74a4d8c2SCharles.Forsyth } else {
282*74a4d8c2SCharles.Forsyth RET(POWER);
283*74a4d8c2SCharles.Forsyth }
284*74a4d8c2SCharles.Forsyth } else
285*74a4d8c2SCharles.Forsyth RET('*');
286*74a4d8c2SCharles.Forsyth case '/':
287*74a4d8c2SCharles.Forsyth RET('/');
288*74a4d8c2SCharles.Forsyth case '%':
289*74a4d8c2SCharles.Forsyth if (peek() == '=') {
290*74a4d8c2SCharles.Forsyth input(); yylval.i = MODEQ; RET(ASGNOP);
291*74a4d8c2SCharles.Forsyth } else
292*74a4d8c2SCharles.Forsyth RET('%');
293*74a4d8c2SCharles.Forsyth case '^':
294*74a4d8c2SCharles.Forsyth if (peek() == '=') {
295*74a4d8c2SCharles.Forsyth input(); yylval.i = POWEQ; RET(ASGNOP);
296*74a4d8c2SCharles.Forsyth } else
297*74a4d8c2SCharles.Forsyth RET(POWER);
298*74a4d8c2SCharles.Forsyth
299*74a4d8c2SCharles.Forsyth case '$':
300*74a4d8c2SCharles.Forsyth /* BUG: awkward, if not wrong */
301*74a4d8c2SCharles.Forsyth c = gettok(&buf, &bufsize);
302*74a4d8c2SCharles.Forsyth if (isalpha(c)) {
303*74a4d8c2SCharles.Forsyth if (strcmp(buf, "NF") == 0) { /* very special */
304*74a4d8c2SCharles.Forsyth unputstr("(NF)");
305*74a4d8c2SCharles.Forsyth RET(INDIRECT);
306*74a4d8c2SCharles.Forsyth }
307*74a4d8c2SCharles.Forsyth c = peek();
308*74a4d8c2SCharles.Forsyth if (c == '(' || c == '[' || (infunc && isarg(buf) >= 0)) {
309*74a4d8c2SCharles.Forsyth unputstr(buf);
310*74a4d8c2SCharles.Forsyth RET(INDIRECT);
311*74a4d8c2SCharles.Forsyth }
312*74a4d8c2SCharles.Forsyth yylval.cp = setsymtab(buf, "", 0.0, STR|NUM, symtab);
313*74a4d8c2SCharles.Forsyth RET(IVAR);
314*74a4d8c2SCharles.Forsyth } else {
315*74a4d8c2SCharles.Forsyth unputstr(buf);
316*74a4d8c2SCharles.Forsyth RET(INDIRECT);
317*74a4d8c2SCharles.Forsyth }
318*74a4d8c2SCharles.Forsyth
319*74a4d8c2SCharles.Forsyth case '}':
320*74a4d8c2SCharles.Forsyth if (--bracecnt < 0)
321*74a4d8c2SCharles.Forsyth SYNTAX( "extra }" );
322*74a4d8c2SCharles.Forsyth sc = 1;
323*74a4d8c2SCharles.Forsyth RET(';');
324*74a4d8c2SCharles.Forsyth case ']':
325*74a4d8c2SCharles.Forsyth if (--brackcnt < 0)
326*74a4d8c2SCharles.Forsyth SYNTAX( "extra ]" );
327*74a4d8c2SCharles.Forsyth RET(']');
328*74a4d8c2SCharles.Forsyth case ')':
329*74a4d8c2SCharles.Forsyth if (--parencnt < 0)
330*74a4d8c2SCharles.Forsyth SYNTAX( "extra )" );
331*74a4d8c2SCharles.Forsyth RET(')');
332*74a4d8c2SCharles.Forsyth case '{':
333*74a4d8c2SCharles.Forsyth bracecnt++;
334*74a4d8c2SCharles.Forsyth RET('{');
335*74a4d8c2SCharles.Forsyth case '[':
336*74a4d8c2SCharles.Forsyth brackcnt++;
337*74a4d8c2SCharles.Forsyth RET('[');
338*74a4d8c2SCharles.Forsyth case '(':
339*74a4d8c2SCharles.Forsyth parencnt++;
340*74a4d8c2SCharles.Forsyth RET('(');
341*74a4d8c2SCharles.Forsyth
342*74a4d8c2SCharles.Forsyth case '"':
343*74a4d8c2SCharles.Forsyth return string(); /* BUG: should be like tran.c ? */
344*74a4d8c2SCharles.Forsyth
345*74a4d8c2SCharles.Forsyth default:
346*74a4d8c2SCharles.Forsyth RET(c);
347*74a4d8c2SCharles.Forsyth }
348*74a4d8c2SCharles.Forsyth }
349*74a4d8c2SCharles.Forsyth }
350*74a4d8c2SCharles.Forsyth
string(void)351*74a4d8c2SCharles.Forsyth int string(void)
352*74a4d8c2SCharles.Forsyth {
353*74a4d8c2SCharles.Forsyth int c, n;
354*74a4d8c2SCharles.Forsyth char *s, *bp;
355*74a4d8c2SCharles.Forsyth static char *buf = 0;
356*74a4d8c2SCharles.Forsyth static int bufsz = 500;
357*74a4d8c2SCharles.Forsyth
358*74a4d8c2SCharles.Forsyth if (buf == 0 && (buf = (char *) malloc(bufsz)) == NULL)
359*74a4d8c2SCharles.Forsyth FATAL("out of space for strings");
360*74a4d8c2SCharles.Forsyth for (bp = buf; (c = input()) != '"'; ) {
361*74a4d8c2SCharles.Forsyth if (!adjbuf(&buf, &bufsz, bp-buf+2, 500, &bp, 0))
362*74a4d8c2SCharles.Forsyth FATAL("out of space for string %.10s...", buf);
363*74a4d8c2SCharles.Forsyth switch (c) {
364*74a4d8c2SCharles.Forsyth case '\n':
365*74a4d8c2SCharles.Forsyth case '\r':
366*74a4d8c2SCharles.Forsyth case 0:
367*74a4d8c2SCharles.Forsyth SYNTAX( "non-terminated string %.10s...", buf );
368*74a4d8c2SCharles.Forsyth lineno++;
369*74a4d8c2SCharles.Forsyth break;
370*74a4d8c2SCharles.Forsyth case '\\':
371*74a4d8c2SCharles.Forsyth c = input();
372*74a4d8c2SCharles.Forsyth switch (c) {
373*74a4d8c2SCharles.Forsyth case '"': *bp++ = '"'; break;
374*74a4d8c2SCharles.Forsyth case 'n': *bp++ = '\n'; break;
375*74a4d8c2SCharles.Forsyth case 't': *bp++ = '\t'; break;
376*74a4d8c2SCharles.Forsyth case 'f': *bp++ = '\f'; break;
377*74a4d8c2SCharles.Forsyth case 'r': *bp++ = '\r'; break;
378*74a4d8c2SCharles.Forsyth case 'b': *bp++ = '\b'; break;
379*74a4d8c2SCharles.Forsyth case 'v': *bp++ = '\v'; break;
380*74a4d8c2SCharles.Forsyth case 'a': *bp++ = '\007'; break;
381*74a4d8c2SCharles.Forsyth case '\\': *bp++ = '\\'; break;
382*74a4d8c2SCharles.Forsyth
383*74a4d8c2SCharles.Forsyth case '0': case '1': case '2': /* octal: \d \dd \ddd */
384*74a4d8c2SCharles.Forsyth case '3': case '4': case '5': case '6': case '7':
385*74a4d8c2SCharles.Forsyth n = c - '0';
386*74a4d8c2SCharles.Forsyth if ((c = peek()) >= '0' && c < '8') {
387*74a4d8c2SCharles.Forsyth n = 8 * n + input() - '0';
388*74a4d8c2SCharles.Forsyth if ((c = peek()) >= '0' && c < '8')
389*74a4d8c2SCharles.Forsyth n = 8 * n + input() - '0';
390*74a4d8c2SCharles.Forsyth }
391*74a4d8c2SCharles.Forsyth *bp++ = n;
392*74a4d8c2SCharles.Forsyth break;
393*74a4d8c2SCharles.Forsyth
394*74a4d8c2SCharles.Forsyth case 'x': /* hex \x0-9a-fA-F + */
395*74a4d8c2SCharles.Forsyth { char xbuf[100], *px;
396*74a4d8c2SCharles.Forsyth for (px = xbuf; (c = input()) != 0 && px-xbuf < 100-2; ) {
397*74a4d8c2SCharles.Forsyth if (isdigit(c)
398*74a4d8c2SCharles.Forsyth || (c >= 'a' && c <= 'f')
399*74a4d8c2SCharles.Forsyth || (c >= 'A' && c <= 'F'))
400*74a4d8c2SCharles.Forsyth *px++ = c;
401*74a4d8c2SCharles.Forsyth else
402*74a4d8c2SCharles.Forsyth break;
403*74a4d8c2SCharles.Forsyth }
404*74a4d8c2SCharles.Forsyth *px = 0;
405*74a4d8c2SCharles.Forsyth unput(c);
406*74a4d8c2SCharles.Forsyth sscanf(xbuf, "%x", &n);
407*74a4d8c2SCharles.Forsyth *bp++ = n;
408*74a4d8c2SCharles.Forsyth break;
409*74a4d8c2SCharles.Forsyth }
410*74a4d8c2SCharles.Forsyth
411*74a4d8c2SCharles.Forsyth default:
412*74a4d8c2SCharles.Forsyth *bp++ = c;
413*74a4d8c2SCharles.Forsyth break;
414*74a4d8c2SCharles.Forsyth }
415*74a4d8c2SCharles.Forsyth break;
416*74a4d8c2SCharles.Forsyth default:
417*74a4d8c2SCharles.Forsyth *bp++ = c;
418*74a4d8c2SCharles.Forsyth break;
419*74a4d8c2SCharles.Forsyth }
420*74a4d8c2SCharles.Forsyth }
421*74a4d8c2SCharles.Forsyth *bp = 0;
422*74a4d8c2SCharles.Forsyth s = tostring(buf);
423*74a4d8c2SCharles.Forsyth *bp++ = ' '; *bp++ = 0;
424*74a4d8c2SCharles.Forsyth yylval.cp = setsymtab(buf, s, 0.0, CON|STR|DONTFREE, symtab);
425*74a4d8c2SCharles.Forsyth RET(STRING);
426*74a4d8c2SCharles.Forsyth }
427*74a4d8c2SCharles.Forsyth
428*74a4d8c2SCharles.Forsyth
binsearch(char * w,Keyword * kp,int n)429*74a4d8c2SCharles.Forsyth int binsearch(char *w, Keyword *kp, int n)
430*74a4d8c2SCharles.Forsyth {
431*74a4d8c2SCharles.Forsyth int cond, low, mid, high;
432*74a4d8c2SCharles.Forsyth
433*74a4d8c2SCharles.Forsyth low = 0;
434*74a4d8c2SCharles.Forsyth high = n - 1;
435*74a4d8c2SCharles.Forsyth while (low <= high) {
436*74a4d8c2SCharles.Forsyth mid = (low + high) / 2;
437*74a4d8c2SCharles.Forsyth if ((cond = strcmp(w, kp[mid].word)) < 0)
438*74a4d8c2SCharles.Forsyth high = mid - 1;
439*74a4d8c2SCharles.Forsyth else if (cond > 0)
440*74a4d8c2SCharles.Forsyth low = mid + 1;
441*74a4d8c2SCharles.Forsyth else
442*74a4d8c2SCharles.Forsyth return mid;
443*74a4d8c2SCharles.Forsyth }
444*74a4d8c2SCharles.Forsyth return -1;
445*74a4d8c2SCharles.Forsyth }
446*74a4d8c2SCharles.Forsyth
word(char * w)447*74a4d8c2SCharles.Forsyth int word(char *w)
448*74a4d8c2SCharles.Forsyth {
449*74a4d8c2SCharles.Forsyth Keyword *kp;
450*74a4d8c2SCharles.Forsyth int c, n;
451*74a4d8c2SCharles.Forsyth
452*74a4d8c2SCharles.Forsyth n = binsearch(w, keywords, sizeof(keywords)/sizeof(keywords[0]));
453*74a4d8c2SCharles.Forsyth kp = keywords + n;
454*74a4d8c2SCharles.Forsyth if (n != -1) { /* found in table */
455*74a4d8c2SCharles.Forsyth yylval.i = kp->sub;
456*74a4d8c2SCharles.Forsyth switch (kp->type) { /* special handling */
457*74a4d8c2SCharles.Forsyth case FSYSTEM:
458*74a4d8c2SCharles.Forsyth if (safe)
459*74a4d8c2SCharles.Forsyth SYNTAX( "system is unsafe" );
460*74a4d8c2SCharles.Forsyth RET(kp->type);
461*74a4d8c2SCharles.Forsyth case FUNC:
462*74a4d8c2SCharles.Forsyth if (infunc)
463*74a4d8c2SCharles.Forsyth SYNTAX( "illegal nested function" );
464*74a4d8c2SCharles.Forsyth RET(kp->type);
465*74a4d8c2SCharles.Forsyth case RETURN:
466*74a4d8c2SCharles.Forsyth if (!infunc)
467*74a4d8c2SCharles.Forsyth SYNTAX( "return not in function" );
468*74a4d8c2SCharles.Forsyth RET(kp->type);
469*74a4d8c2SCharles.Forsyth case VARNF:
470*74a4d8c2SCharles.Forsyth yylval.cp = setsymtab("NF", "", 0.0, NUM, symtab);
471*74a4d8c2SCharles.Forsyth RET(VARNF);
472*74a4d8c2SCharles.Forsyth default:
473*74a4d8c2SCharles.Forsyth RET(kp->type);
474*74a4d8c2SCharles.Forsyth }
475*74a4d8c2SCharles.Forsyth }
476*74a4d8c2SCharles.Forsyth c = peek(); /* look for '(' */
477*74a4d8c2SCharles.Forsyth if (c != '(' && infunc && (n=isarg(w)) >= 0) {
478*74a4d8c2SCharles.Forsyth yylval.i = n;
479*74a4d8c2SCharles.Forsyth RET(ARG);
480*74a4d8c2SCharles.Forsyth } else {
481*74a4d8c2SCharles.Forsyth yylval.cp = setsymtab(w, "", 0.0, STR|NUM|DONTFREE, symtab);
482*74a4d8c2SCharles.Forsyth if (c == '(') {
483*74a4d8c2SCharles.Forsyth RET(CALL);
484*74a4d8c2SCharles.Forsyth } else {
485*74a4d8c2SCharles.Forsyth RET(VAR);
486*74a4d8c2SCharles.Forsyth }
487*74a4d8c2SCharles.Forsyth }
488*74a4d8c2SCharles.Forsyth }
489*74a4d8c2SCharles.Forsyth
startreg(void)490*74a4d8c2SCharles.Forsyth void startreg(void) /* next call to yyles will return a regular expression */
491*74a4d8c2SCharles.Forsyth {
492*74a4d8c2SCharles.Forsyth reg = 1;
493*74a4d8c2SCharles.Forsyth }
494*74a4d8c2SCharles.Forsyth
regexpr(void)495*74a4d8c2SCharles.Forsyth int regexpr(void)
496*74a4d8c2SCharles.Forsyth {
497*74a4d8c2SCharles.Forsyth int c;
498*74a4d8c2SCharles.Forsyth static char *buf = 0;
499*74a4d8c2SCharles.Forsyth static int bufsz = 500;
500*74a4d8c2SCharles.Forsyth char *bp;
501*74a4d8c2SCharles.Forsyth
502*74a4d8c2SCharles.Forsyth if (buf == 0 && (buf = (char *) malloc(bufsz)) == NULL)
503*74a4d8c2SCharles.Forsyth FATAL("out of space for rex expr");
504*74a4d8c2SCharles.Forsyth bp = buf;
505*74a4d8c2SCharles.Forsyth for ( ; (c = input()) != '/' && c != 0; ) {
506*74a4d8c2SCharles.Forsyth if (!adjbuf(&buf, &bufsz, bp-buf+3, 500, &bp, 0))
507*74a4d8c2SCharles.Forsyth FATAL("out of space for reg expr %.10s...", buf);
508*74a4d8c2SCharles.Forsyth if (c == '\n') {
509*74a4d8c2SCharles.Forsyth SYNTAX( "newline in regular expression %.10s...", buf );
510*74a4d8c2SCharles.Forsyth unput('\n');
511*74a4d8c2SCharles.Forsyth break;
512*74a4d8c2SCharles.Forsyth } else if (c == '\\') {
513*74a4d8c2SCharles.Forsyth *bp++ = '\\';
514*74a4d8c2SCharles.Forsyth *bp++ = input();
515*74a4d8c2SCharles.Forsyth } else {
516*74a4d8c2SCharles.Forsyth *bp++ = c;
517*74a4d8c2SCharles.Forsyth }
518*74a4d8c2SCharles.Forsyth }
519*74a4d8c2SCharles.Forsyth *bp = 0;
520*74a4d8c2SCharles.Forsyth yylval.s = tostring(buf);
521*74a4d8c2SCharles.Forsyth unput('/');
522*74a4d8c2SCharles.Forsyth RET(REGEXPR);
523*74a4d8c2SCharles.Forsyth }
524*74a4d8c2SCharles.Forsyth
525*74a4d8c2SCharles.Forsyth /* low-level lexical stuff, sort of inherited from lex */
526*74a4d8c2SCharles.Forsyth
527*74a4d8c2SCharles.Forsyth char ebuf[300];
528*74a4d8c2SCharles.Forsyth char *ep = ebuf;
529*74a4d8c2SCharles.Forsyth char yysbuf[100]; /* pushback buffer */
530*74a4d8c2SCharles.Forsyth char *yysptr = yysbuf;
531*74a4d8c2SCharles.Forsyth FILE *yyin = 0;
532*74a4d8c2SCharles.Forsyth
input(void)533*74a4d8c2SCharles.Forsyth int input(void) /* get next lexical input character */
534*74a4d8c2SCharles.Forsyth {
535*74a4d8c2SCharles.Forsyth int c;
536*74a4d8c2SCharles.Forsyth extern char *lexprog;
537*74a4d8c2SCharles.Forsyth
538*74a4d8c2SCharles.Forsyth if (yysptr > yysbuf)
539*74a4d8c2SCharles.Forsyth c = *--yysptr;
540*74a4d8c2SCharles.Forsyth else if (lexprog != NULL) { /* awk '...' */
541*74a4d8c2SCharles.Forsyth if ((c = *lexprog) != 0)
542*74a4d8c2SCharles.Forsyth lexprog++;
543*74a4d8c2SCharles.Forsyth } else /* awk -f ... */
544*74a4d8c2SCharles.Forsyth c = pgetc();
545*74a4d8c2SCharles.Forsyth if (c == '\n')
546*74a4d8c2SCharles.Forsyth lineno++;
547*74a4d8c2SCharles.Forsyth else if (c == EOF)
548*74a4d8c2SCharles.Forsyth c = 0;
549*74a4d8c2SCharles.Forsyth if (ep >= ebuf + sizeof ebuf)
550*74a4d8c2SCharles.Forsyth ep = ebuf;
551*74a4d8c2SCharles.Forsyth return *ep++ = c;
552*74a4d8c2SCharles.Forsyth }
553*74a4d8c2SCharles.Forsyth
unput(int c)554*74a4d8c2SCharles.Forsyth void unput(int c) /* put lexical character back on input */
555*74a4d8c2SCharles.Forsyth {
556*74a4d8c2SCharles.Forsyth if (c == '\n')
557*74a4d8c2SCharles.Forsyth lineno--;
558*74a4d8c2SCharles.Forsyth if (yysptr >= yysbuf + sizeof(yysbuf))
559*74a4d8c2SCharles.Forsyth FATAL("pushed back too much: %.20s...", yysbuf);
560*74a4d8c2SCharles.Forsyth *yysptr++ = c;
561*74a4d8c2SCharles.Forsyth if (--ep < ebuf)
562*74a4d8c2SCharles.Forsyth ep = ebuf + sizeof(ebuf) - 1;
563*74a4d8c2SCharles.Forsyth }
564*74a4d8c2SCharles.Forsyth
unputstr(char * s)565*74a4d8c2SCharles.Forsyth void unputstr(char *s) /* put a string back on input */
566*74a4d8c2SCharles.Forsyth {
567*74a4d8c2SCharles.Forsyth int i;
568*74a4d8c2SCharles.Forsyth
569*74a4d8c2SCharles.Forsyth for (i = strlen(s)-1; i >= 0; i--)
570*74a4d8c2SCharles.Forsyth unput(s[i]);
571*74a4d8c2SCharles.Forsyth }
572