1*65389Sbostic %Start A str sc reg comment 2*65389Sbostic 3*65389Sbostic %{ 4*65389Sbostic /**************************************************************** 5*65389Sbostic Copyright (C) AT&T 1993 6*65389Sbostic All Rights Reserved 7*65389Sbostic 8*65389Sbostic Permission to use, copy, modify, and distribute this software and 9*65389Sbostic its documentation for any purpose and without fee is hereby 10*65389Sbostic granted, provided that the above copyright notice appear in all 11*65389Sbostic copies and that both that the copyright notice and this 12*65389Sbostic permission notice and warranty disclaimer appear in supporting 13*65389Sbostic documentation, and that the name of AT&T or any of its entities 14*65389Sbostic not be used in advertising or publicity pertaining to 15*65389Sbostic distribution of the software without specific, written prior 16*65389Sbostic permission. 17*65389Sbostic 18*65389Sbostic AT&T DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, 19*65389Sbostic INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS. 20*65389Sbostic IN NO EVENT SHALL AT&T OR ANY OF ITS ENTITIES BE LIABLE FOR ANY 21*65389Sbostic SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 22*65389Sbostic WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER 23*65389Sbostic IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, 24*65389Sbostic ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF 25*65389Sbostic THIS SOFTWARE. 26*65389Sbostic ****************************************************************/ 27*65389Sbostic 28*65389Sbostic /* some of this depends on behavior of lex that 29*65389Sbostic may not be preserved in other implementations of lex. 30*65389Sbostic */ 31*65389Sbostic 32*65389Sbostic #undef input /* defeat lex */ 33*65389Sbostic #undef unput 34*65389Sbostic 35*65389Sbostic #include <stdlib.h> 36*65389Sbostic #include <string.h> 37*65389Sbostic #include "awk.h" 38*65389Sbostic #include "y.tab.h" 39*65389Sbostic 40*65389Sbostic extern YYSTYPE yylval; 41*65389Sbostic extern int infunc; 42*65389Sbostic 43*65389Sbostic int lineno = 1; 44*65389Sbostic int bracecnt = 0; 45*65389Sbostic int brackcnt = 0; 46*65389Sbostic int parencnt = 0; 47*65389Sbostic #define DEBUG 48*65389Sbostic #ifdef DEBUG 49*65389Sbostic # define RET(x) {if(dbg)printf("lex %s [%s]\n", tokname(x), yytext); return(x); } 50*65389Sbostic #else 51*65389Sbostic # define RET(x) return(x) 52*65389Sbostic #endif 53*65389Sbostic 54*65389Sbostic #define CADD cbuf[clen++] = yytext[0]; \ 55*65389Sbostic if (clen >= CBUFLEN-1) { \ 56*65389Sbostic ERROR "string/reg expr %.30s... too long", cbuf SYNTAX; \ 57*65389Sbostic BEGIN A; \ 58*65389Sbostic } 59*65389Sbostic 60*65389Sbostic uchar cbuf[CBUFLEN]; 61*65389Sbostic uchar *s; 62*65389Sbostic int clen, cflag; 63*65389Sbostic %} 64*65389Sbostic 65*65389Sbostic A [a-zA-Z_] 66*65389Sbostic B [a-zA-Z0-9_] 67*65389Sbostic D [0-9] 68*65389Sbostic O [0-7] 69*65389Sbostic H [0-9a-fA-F] 70*65389Sbostic WS [ \t] 71*65389Sbostic 72*65389Sbostic %% 73*65389Sbostic switch (yybgin-yysvec-1) { /* witchcraft */ 74*65389Sbostic case 0: 75*65389Sbostic BEGIN A; 76*65389Sbostic break; 77*65389Sbostic case sc: 78*65389Sbostic BEGIN A; 79*65389Sbostic RET('}'); 80*65389Sbostic } 81*65389Sbostic 82*65389Sbostic <A>\n { lineno++; RET(NL); } 83*65389Sbostic <A>#.* { ; } /* strip comments */ 84*65389Sbostic <A>{WS}+ { ; } 85*65389Sbostic <A>; { RET(';'); } 86*65389Sbostic 87*65389Sbostic <A>"\\"\n { lineno++; } 88*65389Sbostic <A>BEGIN { RET(XBEGIN); } 89*65389Sbostic <A>END { RET(XEND); } 90*65389Sbostic <A>func(tion)? { if (infunc) ERROR "illegal nested function" SYNTAX; RET(FUNC); } 91*65389Sbostic <A>return { if (!infunc) ERROR "return not in function" SYNTAX; RET(RETURN); } 92*65389Sbostic <A>"&&" { RET(AND); } 93*65389Sbostic <A>"||" { RET(BOR); } 94*65389Sbostic <A>"!" { RET(NOT); } 95*65389Sbostic <A>"!=" { yylval.i = NE; RET(NE); } 96*65389Sbostic <A>"~" { yylval.i = MATCH; RET(MATCHOP); } 97*65389Sbostic <A>"!~" { yylval.i = NOTMATCH; RET(MATCHOP); } 98*65389Sbostic <A>"<" { yylval.i = LT; RET(LT); } 99*65389Sbostic <A>"<=" { yylval.i = LE; RET(LE); } 100*65389Sbostic <A>"==" { yylval.i = EQ; RET(EQ); } 101*65389Sbostic <A>">=" { yylval.i = GE; RET(GE); } 102*65389Sbostic <A>">" { yylval.i = GT; RET(GT); } 103*65389Sbostic <A>">>" { yylval.i = APPEND; RET(APPEND); } 104*65389Sbostic <A>"++" { yylval.i = INCR; RET(INCR); } 105*65389Sbostic <A>"--" { yylval.i = DECR; RET(DECR); } 106*65389Sbostic <A>"+=" { yylval.i = ADDEQ; RET(ASGNOP); } 107*65389Sbostic <A>"-=" { yylval.i = SUBEQ; RET(ASGNOP); } 108*65389Sbostic <A>"*=" { yylval.i = MULTEQ; RET(ASGNOP); } 109*65389Sbostic <A>"/=" { yylval.i = DIVEQ; RET(ASGNOP); } 110*65389Sbostic <A>"%=" { yylval.i = MODEQ; RET(ASGNOP); } 111*65389Sbostic <A>"^=" { yylval.i = POWEQ; RET(ASGNOP); } 112*65389Sbostic <A>"**=" { yylval.i = POWEQ; RET(ASGNOP); } 113*65389Sbostic <A>"=" { yylval.i = ASSIGN; RET(ASGNOP); } 114*65389Sbostic <A>"**" { RET(POWER); } 115*65389Sbostic <A>"^" { RET(POWER); } 116*65389Sbostic 117*65389Sbostic <A>"$"{D}+ { yylval.cp = fieldadr(atoi(yytext+1)); RET(FIELD); } 118*65389Sbostic <A>"$NF" { unputstr("(NF)"); return(INDIRECT); } 119*65389Sbostic <A>"$"{A}{B}* { int c, n; 120*65389Sbostic c = input(); unput(c); 121*65389Sbostic if (c == '(' || c == '[' || infunc && (n=isarg(yytext+1)) >= 0) { 122*65389Sbostic unputstr(yytext+1); 123*65389Sbostic return(INDIRECT); 124*65389Sbostic } else { 125*65389Sbostic yylval.cp = setsymtab(yytext+1,"",0.0,STR|NUM,symtab); 126*65389Sbostic RET(IVAR); 127*65389Sbostic } 128*65389Sbostic } 129*65389Sbostic <A>"$" { RET(INDIRECT); } 130*65389Sbostic <A>NF { yylval.cp = setsymtab(yytext, "", 0.0, NUM, symtab); RET(VARNF); } 131*65389Sbostic 132*65389Sbostic <A>({D}+("."?){D}*|"."{D}+)((e|E)("+"|-)?{D}+)? { 133*65389Sbostic yylval.cp = setsymtab(yytext, tostring(yytext), atof(yytext), CON|NUM, symtab); 134*65389Sbostic /* should this also have STR set? */ 135*65389Sbostic RET(NUMBER); } 136*65389Sbostic 137*65389Sbostic <A>while { RET(WHILE); } 138*65389Sbostic <A>for { RET(FOR); } 139*65389Sbostic <A>do { RET(DO); } 140*65389Sbostic <A>if { RET(IF); } 141*65389Sbostic <A>else { RET(ELSE); } 142*65389Sbostic <A>next { RET(NEXT); } 143*65389Sbostic <A>exit { RET(EXIT); } 144*65389Sbostic <A>break { RET(BREAK); } 145*65389Sbostic <A>continue { RET(CONTINUE); } 146*65389Sbostic <A>print { yylval.i = PRINT; RET(PRINT); } 147*65389Sbostic <A>printf { yylval.i = PRINTF; RET(PRINTF); } 148*65389Sbostic <A>sprintf { yylval.i = SPRINTF; RET(SPRINTF); } 149*65389Sbostic <A>split { yylval.i = SPLIT; RET(SPLIT); } 150*65389Sbostic <A>substr { RET(SUBSTR); } 151*65389Sbostic <A>sub { yylval.i = SUB; RET(SUB); } 152*65389Sbostic <A>gsub { yylval.i = GSUB; RET(GSUB); } 153*65389Sbostic <A>index { RET(INDEX); } 154*65389Sbostic <A>match { RET(MATCHFCN); } 155*65389Sbostic <A>in { RET(IN); } 156*65389Sbostic <A>getline { RET(GETLINE); } 157*65389Sbostic <A>close { RET(CLOSE); } 158*65389Sbostic <A>delete { RET(DELETE); } 159*65389Sbostic <A>length { yylval.i = FLENGTH; RET(BLTIN); } 160*65389Sbostic <A>log { yylval.i = FLOG; RET(BLTIN); } 161*65389Sbostic <A>int { yylval.i = FINT; RET(BLTIN); } 162*65389Sbostic <A>exp { yylval.i = FEXP; RET(BLTIN); } 163*65389Sbostic <A>sqrt { yylval.i = FSQRT; RET(BLTIN); } 164*65389Sbostic <A>sin { yylval.i = FSIN; RET(BLTIN); } 165*65389Sbostic <A>cos { yylval.i = FCOS; RET(BLTIN); } 166*65389Sbostic <A>atan2 { yylval.i = FATAN; RET(BLTIN); } 167*65389Sbostic <A>system { yylval.i = FSYSTEM; RET(BLTIN); } 168*65389Sbostic <A>rand { yylval.i = FRAND; RET(BLTIN); } 169*65389Sbostic <A>srand { yylval.i = FSRAND; RET(BLTIN); } 170*65389Sbostic <A>toupper { yylval.i = FTOUPPER; RET(BLTIN); } 171*65389Sbostic <A>tolower { yylval.i = FTOLOWER; RET(BLTIN); } 172*65389Sbostic <A>fflush { yylval.i = FFLUSH; RET(BLTIN); } 173*65389Sbostic 174*65389Sbostic <A>{A}{B}* { int n, c; 175*65389Sbostic c = input(); unput(c); /* look for '(' */ 176*65389Sbostic if (c != '(' && infunc && (n=isarg(yytext)) >= 0) { 177*65389Sbostic yylval.i = n; 178*65389Sbostic RET(ARG); 179*65389Sbostic } else { 180*65389Sbostic yylval.cp = setsymtab(yytext,"",0.0,STR|NUM,symtab); 181*65389Sbostic if (c == '(') { 182*65389Sbostic RET(CALL); 183*65389Sbostic } else { 184*65389Sbostic RET(VAR); 185*65389Sbostic } 186*65389Sbostic } 187*65389Sbostic } 188*65389Sbostic <A>\" { BEGIN str; clen = 0; } 189*65389Sbostic 190*65389Sbostic <A>"}" { if (--bracecnt < 0) ERROR "extra }" SYNTAX; BEGIN sc; RET(';'); } 191*65389Sbostic <A>"]" { if (--brackcnt < 0) ERROR "extra ]" SYNTAX; RET(']'); } 192*65389Sbostic <A>")" { if (--parencnt < 0) ERROR "extra )" SYNTAX; RET(')'); } 193*65389Sbostic 194*65389Sbostic <A>. { if (yytext[0] == '{') bracecnt++; 195*65389Sbostic else if (yytext[0] == '[') brackcnt++; 196*65389Sbostic else if (yytext[0] == '(') parencnt++; 197*65389Sbostic RET(yylval.i = yytext[0]); /* everything else */ } 198*65389Sbostic 199*65389Sbostic <reg>\\. { cbuf[clen++] = '\\'; cbuf[clen++] = yytext[1]; } 200*65389Sbostic <reg>\n { ERROR "newline in regular expression %.10s...", cbuf SYNTAX; lineno++; BEGIN A; } 201*65389Sbostic <reg>"/" { BEGIN A; 202*65389Sbostic cbuf[clen] = 0; 203*65389Sbostic yylval.s = tostring(cbuf); 204*65389Sbostic unput('/'); 205*65389Sbostic RET(REGEXPR); } 206*65389Sbostic <reg>. { CADD; } 207*65389Sbostic 208*65389Sbostic <str>\" { BEGIN A; 209*65389Sbostic cbuf[clen] = 0; s = tostring(cbuf); 210*65389Sbostic cbuf[clen] = ' '; cbuf[++clen] = 0; 211*65389Sbostic yylval.cp = setsymtab(cbuf, s, 0.0, CON|STR, symtab); 212*65389Sbostic RET(STRING); } 213*65389Sbostic <str>\n { ERROR "newline in string %.10s...", cbuf SYNTAX; lineno++; BEGIN A; } 214*65389Sbostic <str>"\\\"" { cbuf[clen++] = '"'; } 215*65389Sbostic <str>"\\"n { cbuf[clen++] = '\n'; } 216*65389Sbostic <str>"\\"t { cbuf[clen++] = '\t'; } 217*65389Sbostic <str>"\\"f { cbuf[clen++] = '\f'; } 218*65389Sbostic <str>"\\"r { cbuf[clen++] = '\r'; } 219*65389Sbostic <str>"\\"b { cbuf[clen++] = '\b'; } 220*65389Sbostic <str>"\\"v { cbuf[clen++] = '\v'; } /* these ANSIisms may not be known by */ 221*65389Sbostic <str>"\\"a { cbuf[clen++] = '\007'; } /* your compiler. hence 007 for bell */ 222*65389Sbostic <str>"\\\\" { cbuf[clen++] = '\\'; } 223*65389Sbostic <str>"\\"({O}{O}{O}|{O}{O}|{O}) { int n; 224*65389Sbostic sscanf(yytext+1, "%o", &n); cbuf[clen++] = n; } 225*65389Sbostic <str>"\\"x({H}+) { int n; /* ANSI permits any number! */ 226*65389Sbostic sscanf(yytext+2, "%x", &n); cbuf[clen++] = n; } 227*65389Sbostic <str>"\\". { cbuf[clen++] = yytext[1]; } 228*65389Sbostic <str>. { CADD; } 229*65389Sbostic 230*65389Sbostic %% 231*65389Sbostic 232*65389Sbostic void startreg(void) /* start parsing a regular expression */ 233*65389Sbostic { 234*65389Sbostic BEGIN reg; 235*65389Sbostic clen = 0; 236*65389Sbostic } 237*65389Sbostic 238*65389Sbostic /* input() and unput() are transcriptions of the standard lex 239*65389Sbostic macros for input and output with additions for error message 240*65389Sbostic printing. God help us all if someone changes how lex works. 241*65389Sbostic */ 242*65389Sbostic 243*65389Sbostic uchar ebuf[300]; 244*65389Sbostic uchar *ep = ebuf; 245*65389Sbostic 246*65389Sbostic input(void) /* get next lexical input character */ 247*65389Sbostic { 248*65389Sbostic register int c; 249*65389Sbostic extern uchar *lexprog; 250*65389Sbostic 251*65389Sbostic if (yysptr > yysbuf) 252*65389Sbostic c = U(*--yysptr); 253*65389Sbostic else if (lexprog != NULL) { /* awk '...' */ 254*65389Sbostic if (c = *lexprog) 255*65389Sbostic lexprog++; 256*65389Sbostic } else /* awk -f ... */ 257*65389Sbostic c = pgetc(); 258*65389Sbostic if (c == '\n') 259*65389Sbostic yylineno++; 260*65389Sbostic else if (c == EOF) 261*65389Sbostic c = 0; 262*65389Sbostic if (ep >= ebuf + sizeof ebuf) 263*65389Sbostic ep = ebuf; 264*65389Sbostic return *ep++ = c; 265*65389Sbostic } 266*65389Sbostic 267*65389Sbostic void unput(int c) /* put lexical character back on input */ 268*65389Sbostic { 269*65389Sbostic yytchar = c; 270*65389Sbostic if (yytchar == '\n') 271*65389Sbostic yylineno--; 272*65389Sbostic *yysptr++ = yytchar; 273*65389Sbostic if (--ep < ebuf) 274*65389Sbostic ep = ebuf + sizeof(ebuf) - 1; 275*65389Sbostic } 276*65389Sbostic 277*65389Sbostic 278*65389Sbostic void unputstr(char *s) /* put a string back on input */ 279*65389Sbostic { 280*65389Sbostic int i; 281*65389Sbostic 282*65389Sbostic for (i = strlen(s)-1; i >= 0; i--) 283*65389Sbostic unput(s[i]); 284*65389Sbostic } 285