165389Sbostic %Start A str sc reg comment 265389Sbostic 365389Sbostic %{ 465389Sbostic /**************************************************************** 565389Sbostic Copyright (C) AT&T 1993 665389Sbostic All Rights Reserved 765389Sbostic 865389Sbostic Permission to use, copy, modify, and distribute this software and 965389Sbostic its documentation for any purpose and without fee is hereby 1065389Sbostic granted, provided that the above copyright notice appear in all 1165389Sbostic copies and that both that the copyright notice and this 1265389Sbostic permission notice and warranty disclaimer appear in supporting 1365389Sbostic documentation, and that the name of AT&T or any of its entities 1465389Sbostic not be used in advertising or publicity pertaining to 1565389Sbostic distribution of the software without specific, written prior 1665389Sbostic permission. 1765389Sbostic 1865389Sbostic AT&T DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, 1965389Sbostic INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS. 2065389Sbostic IN NO EVENT SHALL AT&T OR ANY OF ITS ENTITIES BE LIABLE FOR ANY 2165389Sbostic SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 2265389Sbostic WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER 2365389Sbostic IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, 2465389Sbostic ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF 2565389Sbostic THIS SOFTWARE. 2665389Sbostic ****************************************************************/ 2765389Sbostic 2865389Sbostic #include <stdlib.h> 2965389Sbostic #include <string.h> 3065389Sbostic #include "awk.h" 3165389Sbostic #include "y.tab.h" 3265389Sbostic 3365389Sbostic extern YYSTYPE yylval; 3465389Sbostic extern int infunc; 3565389Sbostic 3665389Sbostic int lineno = 1; 3765389Sbostic int bracecnt = 0; 3865389Sbostic int brackcnt = 0; 3965389Sbostic int parencnt = 0; 4065389Sbostic #define DEBUG 4165389Sbostic #ifdef DEBUG 4265389Sbostic # define RET(x) {if(dbg)printf("lex %s [%s]\n", tokname(x), yytext); return(x); } 4365389Sbostic #else 4465389Sbostic # define RET(x) return(x) 4565389Sbostic #endif 4665389Sbostic 4765389Sbostic #define CADD cbuf[clen++] = yytext[0]; \ 4865389Sbostic if (clen >= CBUFLEN-1) { \ 4965389Sbostic ERROR "string/reg expr %.30s... too long", cbuf SYNTAX; \ 5065389Sbostic BEGIN A; \ 5165389Sbostic } 5265389Sbostic 5365389Sbostic uchar cbuf[CBUFLEN]; 5465389Sbostic uchar *s; 5565389Sbostic int clen, cflag; 56*65396Sbostic 57*65396Sbostic /* some of this depends on behavior of lex that 58*65396Sbostic may not be preserved in other implementations of lex. 59*65396Sbostic */ 60*65396Sbostic 61*65396Sbostic static int my_input( YY_CHAR *buf, int max_size ); 62*65396Sbostic 63*65396Sbostic #undef YY_INPUT 64*65396Sbostic #define YY_INPUT(buf,result,max_size) result = my_input(buf, max_size); 65*65396Sbostic 66*65396Sbostic #undef YY_USER_INIT 67*65396Sbostic #define YY_USER_INIT init_input_source(); 6865389Sbostic %} 6965389Sbostic 7065389Sbostic A [a-zA-Z_] 7165389Sbostic B [a-zA-Z0-9_] 7265389Sbostic D [0-9] 7365389Sbostic O [0-7] 7465389Sbostic H [0-9a-fA-F] 7565389Sbostic WS [ \t] 7665389Sbostic 7765389Sbostic %% 78*65396Sbostic switch ((yy_start - 1) / 2) { /* witchcraft */ 7965389Sbostic case 0: 8065389Sbostic BEGIN A; 8165389Sbostic break; 8265389Sbostic case sc: 8365389Sbostic BEGIN A; 8465389Sbostic RET('}'); 8565389Sbostic } 8665389Sbostic 8765389Sbostic <A>\n { lineno++; RET(NL); } 8865389Sbostic <A>#.* { ; } /* strip comments */ 8965389Sbostic <A>{WS}+ { ; } 9065389Sbostic <A>; { RET(';'); } 9165389Sbostic 9265389Sbostic <A>"\\"\n { lineno++; } 9365389Sbostic <A>BEGIN { RET(XBEGIN); } 9465389Sbostic <A>END { RET(XEND); } 9565389Sbostic <A>func(tion)? { if (infunc) ERROR "illegal nested function" SYNTAX; RET(FUNC); } 9665389Sbostic <A>return { if (!infunc) ERROR "return not in function" SYNTAX; RET(RETURN); } 9765389Sbostic <A>"&&" { RET(AND); } 9865389Sbostic <A>"||" { RET(BOR); } 9965389Sbostic <A>"!" { RET(NOT); } 10065389Sbostic <A>"!=" { yylval.i = NE; RET(NE); } 10165389Sbostic <A>"~" { yylval.i = MATCH; RET(MATCHOP); } 10265389Sbostic <A>"!~" { yylval.i = NOTMATCH; RET(MATCHOP); } 10365389Sbostic <A>"<" { yylval.i = LT; RET(LT); } 10465389Sbostic <A>"<=" { yylval.i = LE; RET(LE); } 10565389Sbostic <A>"==" { yylval.i = EQ; RET(EQ); } 10665389Sbostic <A>">=" { yylval.i = GE; RET(GE); } 10765389Sbostic <A>">" { yylval.i = GT; RET(GT); } 10865389Sbostic <A>">>" { yylval.i = APPEND; RET(APPEND); } 10965389Sbostic <A>"++" { yylval.i = INCR; RET(INCR); } 11065389Sbostic <A>"--" { yylval.i = DECR; RET(DECR); } 11165389Sbostic <A>"+=" { yylval.i = ADDEQ; RET(ASGNOP); } 11265389Sbostic <A>"-=" { yylval.i = SUBEQ; RET(ASGNOP); } 11365389Sbostic <A>"*=" { yylval.i = MULTEQ; RET(ASGNOP); } 11465389Sbostic <A>"/=" { yylval.i = DIVEQ; RET(ASGNOP); } 11565389Sbostic <A>"%=" { yylval.i = MODEQ; RET(ASGNOP); } 11665389Sbostic <A>"^=" { yylval.i = POWEQ; RET(ASGNOP); } 11765389Sbostic <A>"**=" { yylval.i = POWEQ; RET(ASGNOP); } 11865389Sbostic <A>"=" { yylval.i = ASSIGN; RET(ASGNOP); } 11965389Sbostic <A>"**" { RET(POWER); } 12065389Sbostic <A>"^" { RET(POWER); } 12165389Sbostic 12265389Sbostic <A>"$"{D}+ { yylval.cp = fieldadr(atoi(yytext+1)); RET(FIELD); } 12365389Sbostic <A>"$NF" { unputstr("(NF)"); return(INDIRECT); } 124*65396Sbostic <A>"$"{A}{B}* { 125*65396Sbostic int c; 126*65396Sbostic char *yytext_copy = strdup(yytext); 127*65396Sbostic c = input(); unput(c); /* look for '(' or '[' */ 128*65396Sbostic if (c == '(' || c == '[' || 129*65396Sbostic infunc && isarg(yytext_copy+1) >= 0) { 130*65396Sbostic unputstr(yytext_copy+1); 131*65396Sbostic free(yytext_copy); 13265389Sbostic return(INDIRECT); 13365389Sbostic } else { 134*65396Sbostic yylval.cp = 135*65396Sbostic setsymtab(yytext_copy+1,"",0.0,STR|NUM,symtab); 136*65396Sbostic free(yytext_copy); 13765389Sbostic RET(IVAR); 13865389Sbostic } 13965389Sbostic } 14065389Sbostic <A>"$" { RET(INDIRECT); } 14165389Sbostic <A>NF { yylval.cp = setsymtab(yytext, "", 0.0, NUM, symtab); RET(VARNF); } 14265389Sbostic 14365389Sbostic <A>({D}+("."?){D}*|"."{D}+)((e|E)("+"|-)?{D}+)? { 14465389Sbostic yylval.cp = setsymtab(yytext, tostring(yytext), atof(yytext), CON|NUM, symtab); 14565389Sbostic /* should this also have STR set? */ 14665389Sbostic RET(NUMBER); } 14765389Sbostic 14865389Sbostic <A>while { RET(WHILE); } 14965389Sbostic <A>for { RET(FOR); } 15065389Sbostic <A>do { RET(DO); } 15165389Sbostic <A>if { RET(IF); } 15265389Sbostic <A>else { RET(ELSE); } 15365389Sbostic <A>next { RET(NEXT); } 15465389Sbostic <A>exit { RET(EXIT); } 15565389Sbostic <A>break { RET(BREAK); } 15665389Sbostic <A>continue { RET(CONTINUE); } 15765389Sbostic <A>print { yylval.i = PRINT; RET(PRINT); } 15865389Sbostic <A>printf { yylval.i = PRINTF; RET(PRINTF); } 15965389Sbostic <A>sprintf { yylval.i = SPRINTF; RET(SPRINTF); } 16065389Sbostic <A>split { yylval.i = SPLIT; RET(SPLIT); } 16165389Sbostic <A>substr { RET(SUBSTR); } 16265389Sbostic <A>sub { yylval.i = SUB; RET(SUB); } 16365389Sbostic <A>gsub { yylval.i = GSUB; RET(GSUB); } 16465389Sbostic <A>index { RET(INDEX); } 16565389Sbostic <A>match { RET(MATCHFCN); } 16665389Sbostic <A>in { RET(IN); } 16765389Sbostic <A>getline { RET(GETLINE); } 16865389Sbostic <A>close { RET(CLOSE); } 16965389Sbostic <A>delete { RET(DELETE); } 17065389Sbostic <A>length { yylval.i = FLENGTH; RET(BLTIN); } 17165389Sbostic <A>log { yylval.i = FLOG; RET(BLTIN); } 17265389Sbostic <A>int { yylval.i = FINT; RET(BLTIN); } 17365389Sbostic <A>exp { yylval.i = FEXP; RET(BLTIN); } 17465389Sbostic <A>sqrt { yylval.i = FSQRT; RET(BLTIN); } 17565389Sbostic <A>sin { yylval.i = FSIN; RET(BLTIN); } 17665389Sbostic <A>cos { yylval.i = FCOS; RET(BLTIN); } 17765389Sbostic <A>atan2 { yylval.i = FATAN; RET(BLTIN); } 17865389Sbostic <A>system { yylval.i = FSYSTEM; RET(BLTIN); } 17965389Sbostic <A>rand { yylval.i = FRAND; RET(BLTIN); } 18065389Sbostic <A>srand { yylval.i = FSRAND; RET(BLTIN); } 18165389Sbostic <A>toupper { yylval.i = FTOUPPER; RET(BLTIN); } 18265389Sbostic <A>tolower { yylval.i = FTOLOWER; RET(BLTIN); } 18365389Sbostic <A>fflush { yylval.i = FFLUSH; RET(BLTIN); } 18465389Sbostic 18565389Sbostic <A>{A}{B}* { int n, c; 186*65396Sbostic char *yytext_copy = strdup(yytext); 18765389Sbostic c = input(); unput(c); /* look for '(' */ 188*65396Sbostic if (c != '(' && infunc && (n=isarg(yytext_copy)) >= 0) { 18965389Sbostic yylval.i = n; 190*65396Sbostic free(yytext_copy); 19165389Sbostic RET(ARG); 19265389Sbostic } else { 193*65396Sbostic yylval.cp = 194*65396Sbostic setsymtab(yytext_copy,"",0.0,STR|NUM,symtab); 195*65396Sbostic free(yytext_copy); 19665389Sbostic if (c == '(') { 19765389Sbostic RET(CALL); 19865389Sbostic } else { 19965389Sbostic RET(VAR); 20065389Sbostic } 20165389Sbostic } 20265389Sbostic } 203*65396Sbostic 20465389Sbostic <A>\" { BEGIN str; clen = 0; } 20565389Sbostic 20665389Sbostic <A>"}" { if (--bracecnt < 0) ERROR "extra }" SYNTAX; BEGIN sc; RET(';'); } 20765389Sbostic <A>"]" { if (--brackcnt < 0) ERROR "extra ]" SYNTAX; RET(']'); } 20865389Sbostic <A>")" { if (--parencnt < 0) ERROR "extra )" SYNTAX; RET(')'); } 20965389Sbostic 21065389Sbostic <A>. { if (yytext[0] == '{') bracecnt++; 21165389Sbostic else if (yytext[0] == '[') brackcnt++; 21265389Sbostic else if (yytext[0] == '(') parencnt++; 21365389Sbostic RET(yylval.i = yytext[0]); /* everything else */ } 21465389Sbostic 21565389Sbostic <reg>\\. { cbuf[clen++] = '\\'; cbuf[clen++] = yytext[1]; } 21665389Sbostic <reg>\n { ERROR "newline in regular expression %.10s...", cbuf SYNTAX; lineno++; BEGIN A; } 21765389Sbostic <reg>"/" { BEGIN A; 21865389Sbostic cbuf[clen] = 0; 21965389Sbostic yylval.s = tostring(cbuf); 22065389Sbostic unput('/'); 22165389Sbostic RET(REGEXPR); } 22265389Sbostic <reg>. { CADD; } 22365389Sbostic 22465389Sbostic <str>\" { BEGIN A; 22565389Sbostic cbuf[clen] = 0; s = tostring(cbuf); 22665389Sbostic cbuf[clen] = ' '; cbuf[++clen] = 0; 22765389Sbostic yylval.cp = setsymtab(cbuf, s, 0.0, CON|STR, symtab); 22865389Sbostic RET(STRING); } 22965389Sbostic <str>\n { ERROR "newline in string %.10s...", cbuf SYNTAX; lineno++; BEGIN A; } 23065389Sbostic <str>"\\\"" { cbuf[clen++] = '"'; } 23165389Sbostic <str>"\\"n { cbuf[clen++] = '\n'; } 23265389Sbostic <str>"\\"t { cbuf[clen++] = '\t'; } 23365389Sbostic <str>"\\"f { cbuf[clen++] = '\f'; } 23465389Sbostic <str>"\\"r { cbuf[clen++] = '\r'; } 23565389Sbostic <str>"\\"b { cbuf[clen++] = '\b'; } 23665389Sbostic <str>"\\"v { cbuf[clen++] = '\v'; } /* these ANSIisms may not be known by */ 23765389Sbostic <str>"\\"a { cbuf[clen++] = '\007'; } /* your compiler. hence 007 for bell */ 23865389Sbostic <str>"\\\\" { cbuf[clen++] = '\\'; } 23965389Sbostic <str>"\\"({O}{O}{O}|{O}{O}|{O}) { int n; 24065389Sbostic sscanf(yytext+1, "%o", &n); cbuf[clen++] = n; } 24165389Sbostic <str>"\\"x({H}+) { int n; /* ANSI permits any number! */ 24265389Sbostic sscanf(yytext+2, "%x", &n); cbuf[clen++] = n; } 24365389Sbostic <str>"\\". { cbuf[clen++] = yytext[1]; } 24465389Sbostic <str>. { CADD; } 24565389Sbostic 24665389Sbostic %% 24765389Sbostic 24865389Sbostic void startreg(void) /* start parsing a regular expression */ 24965389Sbostic { 25065389Sbostic BEGIN reg; 25165389Sbostic clen = 0; 25265389Sbostic } 25365389Sbostic 254*65396Sbostic static int my_input( YY_CHAR *buf, int max_size ) 25565389Sbostic { 25665389Sbostic extern uchar *lexprog; 25765389Sbostic 258*65396Sbostic if ( lexprog ) { /* awk '...' */ 259*65396Sbostic int num_chars = strlen( lexprog ); 260*65396Sbostic if ( num_chars > max_size ) 261*65396Sbostic { 262*65396Sbostic num_chars = max_size; 263*65396Sbostic strncpy( buf, lexprog, num_chars ); 26465389Sbostic } 265*65396Sbostic else 266*65396Sbostic strcpy( buf, lexprog ); 267*65396Sbostic lexprog += num_chars; 268*65396Sbostic return num_chars; 26965389Sbostic 270*65396Sbostic } else { /* awk -f ... */ 271*65396Sbostic int c = pgetc(); 272*65396Sbostic if (c == EOF) 273*65396Sbostic return 0; 274*65396Sbostic buf[0] = c; 275*65396Sbostic return 1; 27665389Sbostic } 277*65396Sbostic } 27865389Sbostic 27965389Sbostic void unputstr(char *s) /* put a string back on input */ 28065389Sbostic { 28165389Sbostic int i; 28265389Sbostic 28365389Sbostic for (i = strlen(s)-1; i >= 0; i--) 28465389Sbostic unput(s[i]); 28565389Sbostic } 286*65396Sbostic 287*65396Sbostic int lex_input() 288*65396Sbostic { 289*65396Sbostic return input(); 290*65396Sbostic } 291