12a55deb1SDavid E. O'Brien /**************************************************************** 22a55deb1SDavid E. O'Brien Copyright (C) Lucent Technologies 1997 32a55deb1SDavid E. O'Brien All Rights Reserved 42a55deb1SDavid E. O'Brien 52a55deb1SDavid E. O'Brien Permission to use, copy, modify, and distribute this software and 62a55deb1SDavid E. O'Brien its documentation for any purpose and without fee is hereby 72a55deb1SDavid E. O'Brien granted, provided that the above copyright notice appear in all 82a55deb1SDavid E. O'Brien copies and that both that the copyright notice and this 92a55deb1SDavid E. O'Brien permission notice and warranty disclaimer appear in supporting 102a55deb1SDavid E. O'Brien documentation, and that the name Lucent Technologies or any of 112a55deb1SDavid E. O'Brien its entities not be used in advertising or publicity pertaining 122a55deb1SDavid E. O'Brien to distribution of the software without specific, written prior 132a55deb1SDavid E. O'Brien permission. 142a55deb1SDavid E. O'Brien 152a55deb1SDavid E. O'Brien LUCENT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, 162a55deb1SDavid E. O'Brien INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS. 172a55deb1SDavid E. O'Brien IN NO EVENT SHALL LUCENT OR ANY OF ITS ENTITIES BE LIABLE FOR ANY 182a55deb1SDavid E. O'Brien SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 192a55deb1SDavid E. O'Brien WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER 202a55deb1SDavid E. O'Brien IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, 212a55deb1SDavid E. O'Brien ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF 222a55deb1SDavid E. O'Brien THIS SOFTWARE. 232a55deb1SDavid E. O'Brien ****************************************************************/ 242a55deb1SDavid E. O'Brien 252a55deb1SDavid E. O'Brien #include <stdio.h> 262a55deb1SDavid E. O'Brien #include <stdlib.h> 272a55deb1SDavid E. O'Brien #include <string.h> 282a55deb1SDavid E. O'Brien #include <ctype.h> 292a55deb1SDavid E. O'Brien #include "awk.h" 30f39dd6a9SWarner Losh #include "awkgram.tab.h" 312a55deb1SDavid E. O'Brien 322a55deb1SDavid E. O'Brien extern YYSTYPE yylval; 33f39dd6a9SWarner Losh extern bool infunc; 342a55deb1SDavid E. O'Brien 352a55deb1SDavid E. O'Brien int lineno = 1; 362a55deb1SDavid E. O'Brien int bracecnt = 0; 372a55deb1SDavid E. O'Brien int brackcnt = 0; 382a55deb1SDavid E. O'Brien int parencnt = 0; 392a55deb1SDavid E. O'Brien 402a55deb1SDavid E. O'Brien typedef struct Keyword { 41813da98dSDavid E. O'Brien const char *word; 422a55deb1SDavid E. O'Brien int sub; 432a55deb1SDavid E. O'Brien int type; 442a55deb1SDavid E. O'Brien } Keyword; 452a55deb1SDavid E. O'Brien 46f39dd6a9SWarner Losh const Keyword keywords[] = { /* keep sorted: binary searched */ 472a55deb1SDavid E. O'Brien { "BEGIN", XBEGIN, XBEGIN }, 482a55deb1SDavid E. O'Brien { "END", XEND, XEND }, 492a55deb1SDavid E. O'Brien { "NF", VARNF, VARNF }, 50eb690a05SWarner Losh { "and", FAND, BLTIN }, 512a55deb1SDavid E. O'Brien { "atan2", FATAN, BLTIN }, 522a55deb1SDavid E. O'Brien { "break", BREAK, BREAK }, 532a55deb1SDavid E. O'Brien { "close", CLOSE, CLOSE }, 54eb690a05SWarner Losh { "compl", FCOMPL, BLTIN }, 552a55deb1SDavid E. O'Brien { "continue", CONTINUE, CONTINUE }, 562a55deb1SDavid E. O'Brien { "cos", FCOS, BLTIN }, 572a55deb1SDavid E. O'Brien { "delete", DELETE, DELETE }, 582a55deb1SDavid E. O'Brien { "do", DO, DO }, 592a55deb1SDavid E. O'Brien { "else", ELSE, ELSE }, 602a55deb1SDavid E. O'Brien { "exit", EXIT, EXIT }, 612a55deb1SDavid E. O'Brien { "exp", FEXP, BLTIN }, 622a55deb1SDavid E. O'Brien { "fflush", FFLUSH, BLTIN }, 632a55deb1SDavid E. O'Brien { "for", FOR, FOR }, 642a55deb1SDavid E. O'Brien { "func", FUNC, FUNC }, 652a55deb1SDavid E. O'Brien { "function", FUNC, FUNC }, 66eb690a05SWarner Losh { "gensub", GENSUB, GENSUB }, 672a55deb1SDavid E. O'Brien { "getline", GETLINE, GETLINE }, 682a55deb1SDavid E. O'Brien { "gsub", GSUB, GSUB }, 692a55deb1SDavid E. O'Brien { "if", IF, IF }, 702a55deb1SDavid E. O'Brien { "in", IN, IN }, 712a55deb1SDavid E. O'Brien { "index", INDEX, INDEX }, 722a55deb1SDavid E. O'Brien { "int", FINT, BLTIN }, 732a55deb1SDavid E. O'Brien { "length", FLENGTH, BLTIN }, 742a55deb1SDavid E. O'Brien { "log", FLOG, BLTIN }, 75eb690a05SWarner Losh { "lshift", FLSHIFT, BLTIN }, 762a55deb1SDavid E. O'Brien { "match", MATCHFCN, MATCHFCN }, 77*8d457988SWarner Losh { "mktime", FMKTIME, BLTIN }, 782a55deb1SDavid E. O'Brien { "next", NEXT, NEXT }, 792a55deb1SDavid E. O'Brien { "nextfile", NEXTFILE, NEXTFILE }, 80eb690a05SWarner Losh { "or", FFOR, BLTIN }, 812a55deb1SDavid E. O'Brien { "print", PRINT, PRINT }, 822a55deb1SDavid E. O'Brien { "printf", PRINTF, PRINTF }, 832a55deb1SDavid E. O'Brien { "rand", FRAND, BLTIN }, 842a55deb1SDavid E. O'Brien { "return", RETURN, RETURN }, 85eb690a05SWarner Losh { "rshift", FRSHIFT, BLTIN }, 862a55deb1SDavid E. O'Brien { "sin", FSIN, BLTIN }, 872a55deb1SDavid E. O'Brien { "split", SPLIT, SPLIT }, 882a55deb1SDavid E. O'Brien { "sprintf", SPRINTF, SPRINTF }, 892a55deb1SDavid E. O'Brien { "sqrt", FSQRT, BLTIN }, 902a55deb1SDavid E. O'Brien { "srand", FSRAND, BLTIN }, 91eb690a05SWarner Losh { "strftime", FSTRFTIME, BLTIN }, 922a55deb1SDavid E. O'Brien { "sub", SUB, SUB }, 932a55deb1SDavid E. O'Brien { "substr", SUBSTR, SUBSTR }, 942a55deb1SDavid E. O'Brien { "system", FSYSTEM, BLTIN }, 95eb690a05SWarner Losh { "systime", FSYSTIME, BLTIN }, 962a55deb1SDavid E. O'Brien { "tolower", FTOLOWER, BLTIN }, 972a55deb1SDavid E. O'Brien { "toupper", FTOUPPER, BLTIN }, 982a55deb1SDavid E. O'Brien { "while", WHILE, WHILE }, 99eb690a05SWarner Losh { "xor", FXOR, BLTIN }, 1002a55deb1SDavid E. O'Brien }; 1012a55deb1SDavid E. O'Brien 1022a55deb1SDavid E. O'Brien #define RET(x) { if(dbg)printf("lex %s\n", tokname(x)); return(x); } 1032a55deb1SDavid E. O'Brien 104f39dd6a9SWarner Losh static int peek(void) 1052a55deb1SDavid E. O'Brien { 1062a55deb1SDavid E. O'Brien int c = input(); 1072a55deb1SDavid E. O'Brien unput(c); 1082a55deb1SDavid E. O'Brien return c; 1092a55deb1SDavid E. O'Brien } 1102a55deb1SDavid E. O'Brien 111f39dd6a9SWarner Losh static int gettok(char **pbuf, int *psz) /* get next input token */ 1122a55deb1SDavid E. O'Brien { 113007c6572SDag-Erling Smørgrav int c, retc; 1142a55deb1SDavid E. O'Brien char *buf = *pbuf; 1152a55deb1SDavid E. O'Brien int sz = *psz; 1162a55deb1SDavid E. O'Brien char *bp = buf; 1172a55deb1SDavid E. O'Brien 1182a55deb1SDavid E. O'Brien c = input(); 1192a55deb1SDavid E. O'Brien if (c == 0) 1202a55deb1SDavid E. O'Brien return 0; 1212a55deb1SDavid E. O'Brien buf[0] = c; 1222a55deb1SDavid E. O'Brien buf[1] = 0; 1232a55deb1SDavid E. O'Brien if (!isalnum(c) && c != '.' && c != '_') 1242a55deb1SDavid E. O'Brien return c; 1252a55deb1SDavid E. O'Brien 1262a55deb1SDavid E. O'Brien *bp++ = c; 1272a55deb1SDavid E. O'Brien if (isalpha(c) || c == '_') { /* it's a varname */ 1282a55deb1SDavid E. O'Brien for ( ; (c = input()) != 0; ) { 1292a55deb1SDavid E. O'Brien if (bp-buf >= sz) 130addad6afSRong-En Fan if (!adjbuf(&buf, &sz, bp-buf+2, 100, &bp, "gettok")) 1312a55deb1SDavid E. O'Brien FATAL( "out of space for name %.10s...", buf ); 1322a55deb1SDavid E. O'Brien if (isalnum(c) || c == '_') 1332a55deb1SDavid E. O'Brien *bp++ = c; 1342a55deb1SDavid E. O'Brien else { 1352a55deb1SDavid E. O'Brien *bp = 0; 1362a55deb1SDavid E. O'Brien unput(c); 1372a55deb1SDavid E. O'Brien break; 1382a55deb1SDavid E. O'Brien } 1392a55deb1SDavid E. O'Brien } 1402a55deb1SDavid E. O'Brien *bp = 0; 141007c6572SDag-Erling Smørgrav retc = 'a'; /* alphanumeric */ 142c263f9bfSRuslan Ermilov } else { /* maybe it's a number, but could be . */ 1432a55deb1SDavid E. O'Brien char *rem; 1442a55deb1SDavid E. O'Brien /* read input until can't be a number */ 1452a55deb1SDavid E. O'Brien for ( ; (c = input()) != 0; ) { 1462a55deb1SDavid E. O'Brien if (bp-buf >= sz) 147addad6afSRong-En Fan if (!adjbuf(&buf, &sz, bp-buf+2, 100, &bp, "gettok")) 1482a55deb1SDavid E. O'Brien FATAL( "out of space for number %.10s...", buf ); 1492a55deb1SDavid E. O'Brien if (isdigit(c) || c == 'e' || c == 'E' 1502a55deb1SDavid E. O'Brien || c == '.' || c == '+' || c == '-') 1512a55deb1SDavid E. O'Brien *bp++ = c; 1522a55deb1SDavid E. O'Brien else { 1532a55deb1SDavid E. O'Brien unput(c); 1542a55deb1SDavid E. O'Brien break; 1552a55deb1SDavid E. O'Brien } 1562a55deb1SDavid E. O'Brien } 1572a55deb1SDavid E. O'Brien *bp = 0; 1582a55deb1SDavid E. O'Brien strtod(buf, &rem); /* parse the number */ 159007c6572SDag-Erling Smørgrav if (rem == buf) { /* it wasn't a valid number at all */ 160c263f9bfSRuslan Ermilov buf[1] = 0; /* return one character as token */ 161f39dd6a9SWarner Losh retc = (uschar)buf[0]; /* character is its own type */ 162c263f9bfSRuslan Ermilov unputstr(rem+1); /* put rest back for later */ 163007c6572SDag-Erling Smørgrav } else { /* some prefix was a number */ 164c263f9bfSRuslan Ermilov unputstr(rem); /* put rest back for later */ 165c263f9bfSRuslan Ermilov rem[0] = 0; /* truncate buf after number part */ 166c263f9bfSRuslan Ermilov retc = '0'; /* type is number */ 167007c6572SDag-Erling Smørgrav } 1682a55deb1SDavid E. O'Brien } 1692a55deb1SDavid E. O'Brien *pbuf = buf; 1702a55deb1SDavid E. O'Brien *psz = sz; 171007c6572SDag-Erling Smørgrav return retc; 1722a55deb1SDavid E. O'Brien } 1732a55deb1SDavid E. O'Brien 1742a55deb1SDavid E. O'Brien int word(char *); 1752a55deb1SDavid E. O'Brien int string(void); 1762a55deb1SDavid E. O'Brien int regexpr(void); 177f39dd6a9SWarner Losh bool sc = false; /* true => return a } right now */ 178f39dd6a9SWarner Losh bool reg = false; /* true => return a REGEXPR now */ 1792a55deb1SDavid E. O'Brien 1802a55deb1SDavid E. O'Brien int yylex(void) 1812a55deb1SDavid E. O'Brien { 1822a55deb1SDavid E. O'Brien int c; 18310ce5b99SWarner Losh static char *buf = NULL; 184addad6afSRong-En Fan static int bufsize = 5; /* BUG: setting this small causes core dump! */ 1852a55deb1SDavid E. O'Brien 18610ce5b99SWarner Losh if (buf == NULL && (buf = (char *) malloc(bufsize)) == NULL) 1872a55deb1SDavid E. O'Brien FATAL( "out of space in yylex" ); 1882a55deb1SDavid E. O'Brien if (sc) { 189f39dd6a9SWarner Losh sc = false; 1902a55deb1SDavid E. O'Brien RET('}'); 1912a55deb1SDavid E. O'Brien } 1922a55deb1SDavid E. O'Brien if (reg) { 193f39dd6a9SWarner Losh reg = false; 1942a55deb1SDavid E. O'Brien return regexpr(); 1952a55deb1SDavid E. O'Brien } 1962a55deb1SDavid E. O'Brien for (;;) { 1972a55deb1SDavid E. O'Brien c = gettok(&buf, &bufsize); 1982a55deb1SDavid E. O'Brien if (c == 0) 1992a55deb1SDavid E. O'Brien return 0; 2002a55deb1SDavid E. O'Brien if (isalpha(c) || c == '_') 2012a55deb1SDavid E. O'Brien return word(buf); 202007c6572SDag-Erling Smørgrav if (isdigit(c)) { 203f39dd6a9SWarner Losh char *cp = tostring(buf); 204f39dd6a9SWarner Losh double result; 205f39dd6a9SWarner Losh 206f39dd6a9SWarner Losh if (is_number(cp, & result)) 207f39dd6a9SWarner Losh yylval.cp = setsymtab(buf, cp, result, CON|NUM, symtab); 208f39dd6a9SWarner Losh else 209f39dd6a9SWarner Losh yylval.cp = setsymtab(buf, cp, 0.0, STR, symtab); 210f39dd6a9SWarner Losh free(cp); 2112a55deb1SDavid E. O'Brien /* should this also have STR set? */ 2122a55deb1SDavid E. O'Brien RET(NUMBER); 2132a55deb1SDavid E. O'Brien } 2142a55deb1SDavid E. O'Brien 2152a55deb1SDavid E. O'Brien yylval.i = c; 2162a55deb1SDavid E. O'Brien switch (c) { 2172a55deb1SDavid E. O'Brien case '\n': /* {EOL} */ 218b5253557SWarner Losh lineno++; 2192a55deb1SDavid E. O'Brien RET(NL); 2202a55deb1SDavid E. O'Brien case '\r': /* assume \n is coming */ 2212a55deb1SDavid E. O'Brien case ' ': /* {WS}+ */ 2222a55deb1SDavid E. O'Brien case '\t': 2232a55deb1SDavid E. O'Brien break; 2242a55deb1SDavid E. O'Brien case '#': /* #.* strip comments */ 2252a55deb1SDavid E. O'Brien while ((c = input()) != '\n' && c != 0) 2262a55deb1SDavid E. O'Brien ; 2272a55deb1SDavid E. O'Brien unput(c); 228f39dd6a9SWarner Losh /* 229f39dd6a9SWarner Losh * Next line is a hack, it compensates for 230f39dd6a9SWarner Losh * unput's treatment of \n. 231f39dd6a9SWarner Losh */ 232f39dd6a9SWarner Losh lineno++; 2332a55deb1SDavid E. O'Brien break; 2342a55deb1SDavid E. O'Brien case ';': 2352a55deb1SDavid E. O'Brien RET(';'); 2362a55deb1SDavid E. O'Brien case '\\': 2372a55deb1SDavid E. O'Brien if (peek() == '\n') { 2382a55deb1SDavid E. O'Brien input(); 239b5253557SWarner Losh lineno++; 2402a55deb1SDavid E. O'Brien } else if (peek() == '\r') { 2412a55deb1SDavid E. O'Brien input(); input(); /* \n */ 2422a55deb1SDavid E. O'Brien lineno++; 2432a55deb1SDavid E. O'Brien } else { 2442a55deb1SDavid E. O'Brien RET(c); 2452a55deb1SDavid E. O'Brien } 2462a55deb1SDavid E. O'Brien break; 2472a55deb1SDavid E. O'Brien case '&': 2482a55deb1SDavid E. O'Brien if (peek() == '&') { 2492a55deb1SDavid E. O'Brien input(); RET(AND); 2502a55deb1SDavid E. O'Brien } else 2512a55deb1SDavid E. O'Brien RET('&'); 2522a55deb1SDavid E. O'Brien case '|': 2532a55deb1SDavid E. O'Brien if (peek() == '|') { 2542a55deb1SDavid E. O'Brien input(); RET(BOR); 2552a55deb1SDavid E. O'Brien } else 2562a55deb1SDavid E. O'Brien RET('|'); 2572a55deb1SDavid E. O'Brien case '!': 2582a55deb1SDavid E. O'Brien if (peek() == '=') { 2592a55deb1SDavid E. O'Brien input(); yylval.i = NE; RET(NE); 2602a55deb1SDavid E. O'Brien } else if (peek() == '~') { 2612a55deb1SDavid E. O'Brien input(); yylval.i = NOTMATCH; RET(MATCHOP); 2622a55deb1SDavid E. O'Brien } else 2632a55deb1SDavid E. O'Brien RET(NOT); 2642a55deb1SDavid E. O'Brien case '~': 2652a55deb1SDavid E. O'Brien yylval.i = MATCH; 2662a55deb1SDavid E. O'Brien RET(MATCHOP); 2672a55deb1SDavid E. O'Brien case '<': 2682a55deb1SDavid E. O'Brien if (peek() == '=') { 2692a55deb1SDavid E. O'Brien input(); yylval.i = LE; RET(LE); 2702a55deb1SDavid E. O'Brien } else { 2712a55deb1SDavid E. O'Brien yylval.i = LT; RET(LT); 2722a55deb1SDavid E. O'Brien } 2732a55deb1SDavid E. O'Brien case '=': 2742a55deb1SDavid E. O'Brien if (peek() == '=') { 2752a55deb1SDavid E. O'Brien input(); yylval.i = EQ; RET(EQ); 2762a55deb1SDavid E. O'Brien } else { 2772a55deb1SDavid E. O'Brien yylval.i = ASSIGN; RET(ASGNOP); 2782a55deb1SDavid E. O'Brien } 2792a55deb1SDavid E. O'Brien case '>': 2802a55deb1SDavid E. O'Brien if (peek() == '=') { 2812a55deb1SDavid E. O'Brien input(); yylval.i = GE; RET(GE); 2822a55deb1SDavid E. O'Brien } else if (peek() == '>') { 2832a55deb1SDavid E. O'Brien input(); yylval.i = APPEND; RET(APPEND); 2842a55deb1SDavid E. O'Brien } else { 2852a55deb1SDavid E. O'Brien yylval.i = GT; RET(GT); 2862a55deb1SDavid E. O'Brien } 2872a55deb1SDavid E. O'Brien case '+': 2882a55deb1SDavid E. O'Brien if (peek() == '+') { 2892a55deb1SDavid E. O'Brien input(); yylval.i = INCR; RET(INCR); 2902a55deb1SDavid E. O'Brien } else if (peek() == '=') { 2912a55deb1SDavid E. O'Brien input(); yylval.i = ADDEQ; RET(ASGNOP); 2922a55deb1SDavid E. O'Brien } else 2932a55deb1SDavid E. O'Brien RET('+'); 2942a55deb1SDavid E. O'Brien case '-': 2952a55deb1SDavid E. O'Brien if (peek() == '-') { 2962a55deb1SDavid E. O'Brien input(); yylval.i = DECR; RET(DECR); 2972a55deb1SDavid E. O'Brien } else if (peek() == '=') { 2982a55deb1SDavid E. O'Brien input(); yylval.i = SUBEQ; RET(ASGNOP); 2992a55deb1SDavid E. O'Brien } else 3002a55deb1SDavid E. O'Brien RET('-'); 3012a55deb1SDavid E. O'Brien case '*': 3022a55deb1SDavid E. O'Brien if (peek() == '=') { /* *= */ 3032a55deb1SDavid E. O'Brien input(); yylval.i = MULTEQ; RET(ASGNOP); 3042a55deb1SDavid E. O'Brien } else if (peek() == '*') { /* ** or **= */ 3052a55deb1SDavid E. O'Brien input(); /* eat 2nd * */ 3062a55deb1SDavid E. O'Brien if (peek() == '=') { 3072a55deb1SDavid E. O'Brien input(); yylval.i = POWEQ; RET(ASGNOP); 3082a55deb1SDavid E. O'Brien } else { 3092a55deb1SDavid E. O'Brien RET(POWER); 3102a55deb1SDavid E. O'Brien } 3112a55deb1SDavid E. O'Brien } else 3122a55deb1SDavid E. O'Brien RET('*'); 3132a55deb1SDavid E. O'Brien case '/': 3142a55deb1SDavid E. O'Brien RET('/'); 3152a55deb1SDavid E. O'Brien case '%': 3162a55deb1SDavid E. O'Brien if (peek() == '=') { 3172a55deb1SDavid E. O'Brien input(); yylval.i = MODEQ; RET(ASGNOP); 3182a55deb1SDavid E. O'Brien } else 3192a55deb1SDavid E. O'Brien RET('%'); 3202a55deb1SDavid E. O'Brien case '^': 3212a55deb1SDavid E. O'Brien if (peek() == '=') { 3222a55deb1SDavid E. O'Brien input(); yylval.i = POWEQ; RET(ASGNOP); 3232a55deb1SDavid E. O'Brien } else 3242a55deb1SDavid E. O'Brien RET(POWER); 3252a55deb1SDavid E. O'Brien 3262a55deb1SDavid E. O'Brien case '$': 3272a55deb1SDavid E. O'Brien /* BUG: awkward, if not wrong */ 3282a55deb1SDavid E. O'Brien c = gettok(&buf, &bufsize); 3292a55deb1SDavid E. O'Brien if (isalpha(c)) { 3302a55deb1SDavid E. O'Brien if (strcmp(buf, "NF") == 0) { /* very special */ 3312a55deb1SDavid E. O'Brien unputstr("(NF)"); 3322a55deb1SDavid E. O'Brien RET(INDIRECT); 3332a55deb1SDavid E. O'Brien } 3342a55deb1SDavid E. O'Brien c = peek(); 3352a55deb1SDavid E. O'Brien if (c == '(' || c == '[' || (infunc && isarg(buf) >= 0)) { 3362a55deb1SDavid E. O'Brien unputstr(buf); 3372a55deb1SDavid E. O'Brien RET(INDIRECT); 3382a55deb1SDavid E. O'Brien } 3392a55deb1SDavid E. O'Brien yylval.cp = setsymtab(buf, "", 0.0, STR|NUM, symtab); 3402a55deb1SDavid E. O'Brien RET(IVAR); 341007c6572SDag-Erling Smørgrav } else if (c == 0) { /* */ 342007c6572SDag-Erling Smørgrav SYNTAX( "unexpected end of input after $" ); 343007c6572SDag-Erling Smørgrav RET(';'); 3442a55deb1SDavid E. O'Brien } else { 3452a55deb1SDavid E. O'Brien unputstr(buf); 3462a55deb1SDavid E. O'Brien RET(INDIRECT); 3472a55deb1SDavid E. O'Brien } 3482a55deb1SDavid E. O'Brien 3492a55deb1SDavid E. O'Brien case '}': 3502a55deb1SDavid E. O'Brien if (--bracecnt < 0) 3512a55deb1SDavid E. O'Brien SYNTAX( "extra }" ); 352f39dd6a9SWarner Losh sc = true; 3532a55deb1SDavid E. O'Brien RET(';'); 3542a55deb1SDavid E. O'Brien case ']': 3552a55deb1SDavid E. O'Brien if (--brackcnt < 0) 3562a55deb1SDavid E. O'Brien SYNTAX( "extra ]" ); 3572a55deb1SDavid E. O'Brien RET(']'); 3582a55deb1SDavid E. O'Brien case ')': 3592a55deb1SDavid E. O'Brien if (--parencnt < 0) 3602a55deb1SDavid E. O'Brien SYNTAX( "extra )" ); 3612a55deb1SDavid E. O'Brien RET(')'); 3622a55deb1SDavid E. O'Brien case '{': 3632a55deb1SDavid E. O'Brien bracecnt++; 3642a55deb1SDavid E. O'Brien RET('{'); 3652a55deb1SDavid E. O'Brien case '[': 3662a55deb1SDavid E. O'Brien brackcnt++; 3672a55deb1SDavid E. O'Brien RET('['); 3682a55deb1SDavid E. O'Brien case '(': 3692a55deb1SDavid E. O'Brien parencnt++; 3702a55deb1SDavid E. O'Brien RET('('); 3712a55deb1SDavid E. O'Brien 3722a55deb1SDavid E. O'Brien case '"': 3732a55deb1SDavid E. O'Brien return string(); /* BUG: should be like tran.c ? */ 3742a55deb1SDavid E. O'Brien 3752a55deb1SDavid E. O'Brien default: 3762a55deb1SDavid E. O'Brien RET(c); 3772a55deb1SDavid E. O'Brien } 3782a55deb1SDavid E. O'Brien } 3792a55deb1SDavid E. O'Brien } 3802a55deb1SDavid E. O'Brien 381f32a6403SWarner Losh extern int runetochar(char *str, int c); 382f32a6403SWarner Losh 3832a55deb1SDavid E. O'Brien int string(void) 3842a55deb1SDavid E. O'Brien { 3852a55deb1SDavid E. O'Brien int c, n; 3862a55deb1SDavid E. O'Brien char *s, *bp; 38710ce5b99SWarner Losh static char *buf = NULL; 3882a55deb1SDavid E. O'Brien static int bufsz = 500; 3892a55deb1SDavid E. O'Brien 39010ce5b99SWarner Losh if (buf == NULL && (buf = (char *) malloc(bufsz)) == NULL) 3912a55deb1SDavid E. O'Brien FATAL("out of space for strings"); 3922a55deb1SDavid E. O'Brien for (bp = buf; (c = input()) != '"'; ) { 393addad6afSRong-En Fan if (!adjbuf(&buf, &bufsz, bp-buf+2, 500, &bp, "string")) 3942a55deb1SDavid E. O'Brien FATAL("out of space for string %.10s...", buf); 3952a55deb1SDavid E. O'Brien switch (c) { 3962a55deb1SDavid E. O'Brien case '\n': 3972a55deb1SDavid E. O'Brien case '\r': 3982a55deb1SDavid E. O'Brien case 0: 399b5253557SWarner Losh *bp = '\0'; 4002a55deb1SDavid E. O'Brien SYNTAX( "non-terminated string %.10s...", buf ); 401007c6572SDag-Erling Smørgrav if (c == 0) /* hopeless */ 402007c6572SDag-Erling Smørgrav FATAL( "giving up" ); 403b5253557SWarner Losh lineno++; 4042a55deb1SDavid E. O'Brien break; 4052a55deb1SDavid E. O'Brien case '\\': 4062a55deb1SDavid E. O'Brien c = input(); 4072a55deb1SDavid E. O'Brien switch (c) { 408f39dd6a9SWarner Losh case '\n': break; 4092a55deb1SDavid E. O'Brien case '"': *bp++ = '"'; break; 4102a55deb1SDavid E. O'Brien case 'n': *bp++ = '\n'; break; 4112a55deb1SDavid E. O'Brien case 't': *bp++ = '\t'; break; 4122a55deb1SDavid E. O'Brien case 'f': *bp++ = '\f'; break; 4132a55deb1SDavid E. O'Brien case 'r': *bp++ = '\r'; break; 4142a55deb1SDavid E. O'Brien case 'b': *bp++ = '\b'; break; 4152a55deb1SDavid E. O'Brien case 'v': *bp++ = '\v'; break; 416f39dd6a9SWarner Losh case 'a': *bp++ = '\a'; break; 4172a55deb1SDavid E. O'Brien case '\\': *bp++ = '\\'; break; 4182a55deb1SDavid E. O'Brien 4192a55deb1SDavid E. O'Brien case '0': case '1': case '2': /* octal: \d \dd \ddd */ 4202a55deb1SDavid E. O'Brien case '3': case '4': case '5': case '6': case '7': 4212a55deb1SDavid E. O'Brien n = c - '0'; 4222a55deb1SDavid E. O'Brien if ((c = peek()) >= '0' && c < '8') { 4232a55deb1SDavid E. O'Brien n = 8 * n + input() - '0'; 4242a55deb1SDavid E. O'Brien if ((c = peek()) >= '0' && c < '8') 4252a55deb1SDavid E. O'Brien n = 8 * n + input() - '0'; 4262a55deb1SDavid E. O'Brien } 4272a55deb1SDavid E. O'Brien *bp++ = n; 4282a55deb1SDavid E. O'Brien break; 4292a55deb1SDavid E. O'Brien 430f32a6403SWarner Losh case 'x': /* hex \x0-9a-fA-F (exactly two) */ 431f32a6403SWarner Losh { 432f32a6403SWarner Losh int i; 433f32a6403SWarner Losh 434f32a6403SWarner Losh if (!isxdigit(peek())) { 435f32a6403SWarner Losh unput(c); 4362a55deb1SDavid E. O'Brien break; 4372a55deb1SDavid E. O'Brien } 438f32a6403SWarner Losh n = 0; 439f32a6403SWarner Losh for (i = 0; i < 2; i++) { 440f32a6403SWarner Losh c = input(); 441f32a6403SWarner Losh if (c == 0) 442f32a6403SWarner Losh break; 443f32a6403SWarner Losh if (isxdigit(c)) { 444f32a6403SWarner Losh c = tolower(c); 445f32a6403SWarner Losh n *= 16; 446f32a6403SWarner Losh if (isdigit(c)) 447f32a6403SWarner Losh n += (c - '0'); 448f32a6403SWarner Losh else 449f32a6403SWarner Losh n += 10 + (c - 'a'); 450f32a6403SWarner Losh } else { 451b2376a5fSWarner Losh unput(c); 452f32a6403SWarner Losh break; 453f32a6403SWarner Losh } 454f32a6403SWarner Losh } 455f32a6403SWarner Losh if (i) 4562a55deb1SDavid E. O'Brien *bp++ = n; 4572a55deb1SDavid E. O'Brien break; 4582a55deb1SDavid E. O'Brien } 4592a55deb1SDavid E. O'Brien 460f32a6403SWarner Losh case 'u': /* utf \u0-9a-fA-F (1..8) */ 461f32a6403SWarner Losh { 462f32a6403SWarner Losh int i; 463f32a6403SWarner Losh 464f32a6403SWarner Losh n = 0; 465f32a6403SWarner Losh for (i = 0; i < 8; i++) { 466f32a6403SWarner Losh c = input(); 467f32a6403SWarner Losh if (!isxdigit(c) || c == 0) 468f32a6403SWarner Losh break; 469f32a6403SWarner Losh c = tolower(c); 470f32a6403SWarner Losh n *= 16; 471f32a6403SWarner Losh if (isdigit(c)) 472f32a6403SWarner Losh n += (c - '0'); 473f32a6403SWarner Losh else 474f32a6403SWarner Losh n += 10 + (c - 'a'); 475f32a6403SWarner Losh } 476f32a6403SWarner Losh unput(c); 477f32a6403SWarner Losh bp += runetochar(bp, n); 478f32a6403SWarner Losh break; 479f32a6403SWarner Losh } 480f32a6403SWarner Losh 4812a55deb1SDavid E. O'Brien default: 4822a55deb1SDavid E. O'Brien *bp++ = c; 4832a55deb1SDavid E. O'Brien break; 4842a55deb1SDavid E. O'Brien } 4852a55deb1SDavid E. O'Brien break; 4862a55deb1SDavid E. O'Brien default: 4872a55deb1SDavid E. O'Brien *bp++ = c; 4882a55deb1SDavid E. O'Brien break; 4892a55deb1SDavid E. O'Brien } 4902a55deb1SDavid E. O'Brien } 4912a55deb1SDavid E. O'Brien *bp = 0; 4922a55deb1SDavid E. O'Brien s = tostring(buf); 493f39dd6a9SWarner Losh *bp++ = ' '; *bp++ = '\0'; 4942a55deb1SDavid E. O'Brien yylval.cp = setsymtab(buf, s, 0.0, CON|STR|DONTFREE, symtab); 495f39dd6a9SWarner Losh free(s); 4962a55deb1SDavid E. O'Brien RET(STRING); 4972a55deb1SDavid E. O'Brien } 4982a55deb1SDavid E. O'Brien 4992a55deb1SDavid E. O'Brien 500f39dd6a9SWarner Losh static int binsearch(char *w, const Keyword *kp, int n) 5012a55deb1SDavid E. O'Brien { 5022a55deb1SDavid E. O'Brien int cond, low, mid, high; 5032a55deb1SDavid E. O'Brien 5042a55deb1SDavid E. O'Brien low = 0; 5052a55deb1SDavid E. O'Brien high = n - 1; 5062a55deb1SDavid E. O'Brien while (low <= high) { 5072a55deb1SDavid E. O'Brien mid = (low + high) / 2; 5082a55deb1SDavid E. O'Brien if ((cond = strcmp(w, kp[mid].word)) < 0) 5092a55deb1SDavid E. O'Brien high = mid - 1; 5102a55deb1SDavid E. O'Brien else if (cond > 0) 5112a55deb1SDavid E. O'Brien low = mid + 1; 5122a55deb1SDavid E. O'Brien else 5132a55deb1SDavid E. O'Brien return mid; 5142a55deb1SDavid E. O'Brien } 5152a55deb1SDavid E. O'Brien return -1; 5162a55deb1SDavid E. O'Brien } 5172a55deb1SDavid E. O'Brien 5182a55deb1SDavid E. O'Brien int word(char *w) 5192a55deb1SDavid E. O'Brien { 520f39dd6a9SWarner Losh const Keyword *kp; 5212a55deb1SDavid E. O'Brien int c, n; 5222a55deb1SDavid E. O'Brien 5232a55deb1SDavid E. O'Brien n = binsearch(w, keywords, sizeof(keywords)/sizeof(keywords[0])); 5242a55deb1SDavid E. O'Brien if (n != -1) { /* found in table */ 52511169460SAlex Richardson kp = keywords + n; 5262a55deb1SDavid E. O'Brien yylval.i = kp->sub; 5272a55deb1SDavid E. O'Brien switch (kp->type) { /* special handling */ 528addad6afSRong-En Fan case BLTIN: 529addad6afSRong-En Fan if (kp->sub == FSYSTEM && safe) 5302a55deb1SDavid E. O'Brien SYNTAX( "system is unsafe" ); 5312a55deb1SDavid E. O'Brien RET(kp->type); 5322a55deb1SDavid E. O'Brien case FUNC: 5332a55deb1SDavid E. O'Brien if (infunc) 5342a55deb1SDavid E. O'Brien SYNTAX( "illegal nested function" ); 5352a55deb1SDavid E. O'Brien RET(kp->type); 5362a55deb1SDavid E. O'Brien case RETURN: 5372a55deb1SDavid E. O'Brien if (!infunc) 5382a55deb1SDavid E. O'Brien SYNTAX( "return not in function" ); 5392a55deb1SDavid E. O'Brien RET(kp->type); 5402a55deb1SDavid E. O'Brien case VARNF: 5412a55deb1SDavid E. O'Brien yylval.cp = setsymtab("NF", "", 0.0, NUM, symtab); 5422a55deb1SDavid E. O'Brien RET(VARNF); 5432a55deb1SDavid E. O'Brien default: 5442a55deb1SDavid E. O'Brien RET(kp->type); 5452a55deb1SDavid E. O'Brien } 5462a55deb1SDavid E. O'Brien } 5472a55deb1SDavid E. O'Brien c = peek(); /* look for '(' */ 5482a55deb1SDavid E. O'Brien if (c != '(' && infunc && (n=isarg(w)) >= 0) { 5492a55deb1SDavid E. O'Brien yylval.i = n; 5502a55deb1SDavid E. O'Brien RET(ARG); 5512a55deb1SDavid E. O'Brien } else { 5522a55deb1SDavid E. O'Brien yylval.cp = setsymtab(w, "", 0.0, STR|NUM|DONTFREE, symtab); 5532a55deb1SDavid E. O'Brien if (c == '(') { 5542a55deb1SDavid E. O'Brien RET(CALL); 5552a55deb1SDavid E. O'Brien } else { 5562a55deb1SDavid E. O'Brien RET(VAR); 5572a55deb1SDavid E. O'Brien } 5582a55deb1SDavid E. O'Brien } 5592a55deb1SDavid E. O'Brien } 5602a55deb1SDavid E. O'Brien 561813da98dSDavid E. O'Brien void startreg(void) /* next call to yylex will return a regular expression */ 5622a55deb1SDavid E. O'Brien { 563f39dd6a9SWarner Losh reg = true; 5642a55deb1SDavid E. O'Brien } 5652a55deb1SDavid E. O'Brien 5662a55deb1SDavid E. O'Brien int regexpr(void) 5672a55deb1SDavid E. O'Brien { 5682a55deb1SDavid E. O'Brien int c; 56910ce5b99SWarner Losh static char *buf = NULL; 5702a55deb1SDavid E. O'Brien static int bufsz = 500; 5712a55deb1SDavid E. O'Brien char *bp; 5722a55deb1SDavid E. O'Brien 57310ce5b99SWarner Losh if (buf == NULL && (buf = (char *) malloc(bufsz)) == NULL) 574f32a6403SWarner Losh FATAL("out of space for reg expr"); 5752a55deb1SDavid E. O'Brien bp = buf; 5762a55deb1SDavid E. O'Brien for ( ; (c = input()) != '/' && c != 0; ) { 577addad6afSRong-En Fan if (!adjbuf(&buf, &bufsz, bp-buf+3, 500, &bp, "regexpr")) 5782a55deb1SDavid E. O'Brien FATAL("out of space for reg expr %.10s...", buf); 5792a55deb1SDavid E. O'Brien if (c == '\n') { 580b5253557SWarner Losh *bp = '\0'; 5812a55deb1SDavid E. O'Brien SYNTAX( "newline in regular expression %.10s...", buf ); 5822a55deb1SDavid E. O'Brien unput('\n'); 5832a55deb1SDavid E. O'Brien break; 5842a55deb1SDavid E. O'Brien } else if (c == '\\') { 5852a55deb1SDavid E. O'Brien *bp++ = '\\'; 5862a55deb1SDavid E. O'Brien *bp++ = input(); 5872a55deb1SDavid E. O'Brien } else { 5882a55deb1SDavid E. O'Brien *bp++ = c; 5892a55deb1SDavid E. O'Brien } 5902a55deb1SDavid E. O'Brien } 5912a55deb1SDavid E. O'Brien *bp = 0; 59288b8d487SRuslan Ermilov if (c == 0) 59388b8d487SRuslan Ermilov SYNTAX("non-terminated regular expression %.10s...", buf); 5942a55deb1SDavid E. O'Brien yylval.s = tostring(buf); 5952a55deb1SDavid E. O'Brien unput('/'); 5962a55deb1SDavid E. O'Brien RET(REGEXPR); 5972a55deb1SDavid E. O'Brien } 5982a55deb1SDavid E. O'Brien 5992a55deb1SDavid E. O'Brien /* low-level lexical stuff, sort of inherited from lex */ 6002a55deb1SDavid E. O'Brien 6012a55deb1SDavid E. O'Brien char ebuf[300]; 6022a55deb1SDavid E. O'Brien char *ep = ebuf; 6032a55deb1SDavid E. O'Brien char yysbuf[100]; /* pushback buffer */ 6042a55deb1SDavid E. O'Brien char *yysptr = yysbuf; 60510ce5b99SWarner Losh FILE *yyin = NULL; 6062a55deb1SDavid E. O'Brien 6072a55deb1SDavid E. O'Brien int input(void) /* get next lexical input character */ 6082a55deb1SDavid E. O'Brien { 6092a55deb1SDavid E. O'Brien int c; 6102a55deb1SDavid E. O'Brien extern char *lexprog; 6112a55deb1SDavid E. O'Brien 6122a55deb1SDavid E. O'Brien if (yysptr > yysbuf) 6132e454f23SRuslan Ermilov c = (uschar)*--yysptr; 6142a55deb1SDavid E. O'Brien else if (lexprog != NULL) { /* awk '...' */ 6152e454f23SRuslan Ermilov if ((c = (uschar)*lexprog) != 0) 6162a55deb1SDavid E. O'Brien lexprog++; 6172a55deb1SDavid E. O'Brien } else /* awk -f ... */ 6182a55deb1SDavid E. O'Brien c = pgetc(); 619b5253557SWarner Losh if (c == EOF) 6202a55deb1SDavid E. O'Brien c = 0; 6212a55deb1SDavid E. O'Brien if (ep >= ebuf + sizeof ebuf) 6222a55deb1SDavid E. O'Brien ep = ebuf; 623b5253557SWarner Losh *ep = c; 624b5253557SWarner Losh if (c != 0) { 625b5253557SWarner Losh ep++; 626b5253557SWarner Losh } 627b5253557SWarner Losh return (c); 6282a55deb1SDavid E. O'Brien } 6292a55deb1SDavid E. O'Brien 6302a55deb1SDavid E. O'Brien void unput(int c) /* put lexical character back on input */ 6312a55deb1SDavid E. O'Brien { 632f39dd6a9SWarner Losh if (c == '\n') 633f39dd6a9SWarner Losh lineno--; 6342a55deb1SDavid E. O'Brien if (yysptr >= yysbuf + sizeof(yysbuf)) 6352a55deb1SDavid E. O'Brien FATAL("pushed back too much: %.20s...", yysbuf); 6362a55deb1SDavid E. O'Brien *yysptr++ = c; 6372a55deb1SDavid E. O'Brien if (--ep < ebuf) 6382a55deb1SDavid E. O'Brien ep = ebuf + sizeof(ebuf) - 1; 6392a55deb1SDavid E. O'Brien } 6402a55deb1SDavid E. O'Brien 641813da98dSDavid E. O'Brien void unputstr(const char *s) /* put a string back on input */ 6422a55deb1SDavid E. O'Brien { 6432a55deb1SDavid E. O'Brien int i; 6442a55deb1SDavid E. O'Brien 6452a55deb1SDavid E. O'Brien for (i = strlen(s)-1; i >= 0; i--) 6462a55deb1SDavid E. O'Brien unput(s[i]); 6472a55deb1SDavid E. O'Brien } 648