14b588458SPeter Avalos /****************************************************************
24b588458SPeter Avalos Copyright (C) Lucent Technologies 1997
34b588458SPeter Avalos All Rights Reserved
44b588458SPeter Avalos
54b588458SPeter Avalos Permission to use, copy, modify, and distribute this software and
64b588458SPeter Avalos its documentation for any purpose and without fee is hereby
74b588458SPeter Avalos granted, provided that the above copyright notice appear in all
84b588458SPeter Avalos copies and that both that the copyright notice and this
94b588458SPeter Avalos permission notice and warranty disclaimer appear in supporting
104b588458SPeter Avalos documentation, and that the name Lucent Technologies or any of
114b588458SPeter Avalos its entities not be used in advertising or publicity pertaining
124b588458SPeter Avalos to distribution of the software without specific, written prior
134b588458SPeter Avalos permission.
144b588458SPeter Avalos
154b588458SPeter Avalos LUCENT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
164b588458SPeter Avalos INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS.
174b588458SPeter Avalos IN NO EVENT SHALL LUCENT OR ANY OF ITS ENTITIES BE LIABLE FOR ANY
184b588458SPeter Avalos SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
194b588458SPeter Avalos WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER
204b588458SPeter Avalos IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
214b588458SPeter Avalos ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF
224b588458SPeter Avalos THIS SOFTWARE.
234b588458SPeter Avalos ****************************************************************/
244b588458SPeter Avalos
254b588458SPeter Avalos #include <stdio.h>
264b588458SPeter Avalos #include <stdlib.h>
274b588458SPeter Avalos #include <string.h>
284b588458SPeter Avalos #include <ctype.h>
294b588458SPeter Avalos #include "awk.h"
3048f09a05SAntonio Huete Jimenez #include "awkgram.tab.h"
314b588458SPeter Avalos
324b588458SPeter Avalos extern YYSTYPE yylval;
331d48fce0SDaniel Fojt extern bool infunc;
344b588458SPeter Avalos
354b588458SPeter Avalos int lineno = 1;
364b588458SPeter Avalos int bracecnt = 0;
374b588458SPeter Avalos int brackcnt = 0;
384b588458SPeter Avalos int parencnt = 0;
394b588458SPeter Avalos
404b588458SPeter Avalos typedef struct Keyword {
414b588458SPeter Avalos const char *word;
424b588458SPeter Avalos int sub;
434b588458SPeter Avalos int type;
444b588458SPeter Avalos } Keyword;
454b588458SPeter Avalos
461d48fce0SDaniel Fojt const Keyword keywords[] = { /* keep sorted: binary searched */
474b588458SPeter Avalos { "BEGIN", XBEGIN, XBEGIN },
484b588458SPeter Avalos { "END", XEND, XEND },
494b588458SPeter Avalos { "NF", VARNF, VARNF },
504b588458SPeter Avalos { "atan2", FATAN, BLTIN },
514b588458SPeter Avalos { "break", BREAK, BREAK },
524b588458SPeter Avalos { "close", CLOSE, CLOSE },
534b588458SPeter Avalos { "continue", CONTINUE, CONTINUE },
544b588458SPeter Avalos { "cos", FCOS, BLTIN },
554b588458SPeter Avalos { "delete", DELETE, DELETE },
564b588458SPeter Avalos { "do", DO, DO },
574b588458SPeter Avalos { "else", ELSE, ELSE },
584b588458SPeter Avalos { "exit", EXIT, EXIT },
594b588458SPeter Avalos { "exp", FEXP, BLTIN },
604b588458SPeter Avalos { "fflush", FFLUSH, BLTIN },
614b588458SPeter Avalos { "for", FOR, FOR },
624b588458SPeter Avalos { "func", FUNC, FUNC },
634b588458SPeter Avalos { "function", FUNC, FUNC },
644b588458SPeter Avalos { "getline", GETLINE, GETLINE },
654b588458SPeter Avalos { "gsub", GSUB, GSUB },
664b588458SPeter Avalos { "if", IF, IF },
674b588458SPeter Avalos { "in", IN, IN },
684b588458SPeter Avalos { "index", INDEX, INDEX },
694b588458SPeter Avalos { "int", FINT, BLTIN },
704b588458SPeter Avalos { "length", FLENGTH, BLTIN },
714b588458SPeter Avalos { "log", FLOG, BLTIN },
724b588458SPeter Avalos { "match", MATCHFCN, MATCHFCN },
734b588458SPeter Avalos { "next", NEXT, NEXT },
744b588458SPeter Avalos { "nextfile", NEXTFILE, NEXTFILE },
754b588458SPeter Avalos { "print", PRINT, PRINT },
764b588458SPeter Avalos { "printf", PRINTF, PRINTF },
774b588458SPeter Avalos { "rand", FRAND, BLTIN },
784b588458SPeter Avalos { "return", RETURN, RETURN },
794b588458SPeter Avalos { "sin", FSIN, BLTIN },
804b588458SPeter Avalos { "split", SPLIT, SPLIT },
814b588458SPeter Avalos { "sprintf", SPRINTF, SPRINTF },
824b588458SPeter Avalos { "sqrt", FSQRT, BLTIN },
834b588458SPeter Avalos { "srand", FSRAND, BLTIN },
844b588458SPeter Avalos { "sub", SUB, SUB },
854b588458SPeter Avalos { "substr", SUBSTR, SUBSTR },
864b588458SPeter Avalos { "system", FSYSTEM, BLTIN },
874b588458SPeter Avalos { "tolower", FTOLOWER, BLTIN },
884b588458SPeter Avalos { "toupper", FTOUPPER, BLTIN },
894b588458SPeter Avalos { "while", WHILE, WHILE },
904b588458SPeter Avalos };
914b588458SPeter Avalos
924b588458SPeter Avalos #define RET(x) { if(dbg)printf("lex %s\n", tokname(x)); return(x); }
934b588458SPeter Avalos
peek(void)941d48fce0SDaniel Fojt static int peek(void)
954b588458SPeter Avalos {
964b588458SPeter Avalos int c = input();
974b588458SPeter Avalos unput(c);
984b588458SPeter Avalos return c;
994b588458SPeter Avalos }
1004b588458SPeter Avalos
gettok(char ** pbuf,int * psz)1011d48fce0SDaniel Fojt static int gettok(char **pbuf, int *psz) /* get next input token */
1024b588458SPeter Avalos {
1034b588458SPeter Avalos int c, retc;
1044b588458SPeter Avalos char *buf = *pbuf;
1054b588458SPeter Avalos int sz = *psz;
1064b588458SPeter Avalos char *bp = buf;
1074b588458SPeter Avalos
1084b588458SPeter Avalos c = input();
1094b588458SPeter Avalos if (c == 0)
1104b588458SPeter Avalos return 0;
1114b588458SPeter Avalos buf[0] = c;
1124b588458SPeter Avalos buf[1] = 0;
1134b588458SPeter Avalos if (!isalnum(c) && c != '.' && c != '_')
1144b588458SPeter Avalos return c;
1154b588458SPeter Avalos
1164b588458SPeter Avalos *bp++ = c;
1174b588458SPeter Avalos if (isalpha(c) || c == '_') { /* it's a varname */
1184b588458SPeter Avalos for ( ; (c = input()) != 0; ) {
1194b588458SPeter Avalos if (bp-buf >= sz)
1204b588458SPeter Avalos if (!adjbuf(&buf, &sz, bp-buf+2, 100, &bp, "gettok"))
1214b588458SPeter Avalos FATAL( "out of space for name %.10s...", buf );
1224b588458SPeter Avalos if (isalnum(c) || c == '_')
1234b588458SPeter Avalos *bp++ = c;
1244b588458SPeter Avalos else {
1254b588458SPeter Avalos *bp = 0;
1264b588458SPeter Avalos unput(c);
1274b588458SPeter Avalos break;
1284b588458SPeter Avalos }
1294b588458SPeter Avalos }
1304b588458SPeter Avalos *bp = 0;
1314b588458SPeter Avalos retc = 'a'; /* alphanumeric */
1324b588458SPeter Avalos } else { /* maybe it's a number, but could be . */
1334b588458SPeter Avalos char *rem;
1344b588458SPeter Avalos /* read input until can't be a number */
1354b588458SPeter Avalos for ( ; (c = input()) != 0; ) {
1364b588458SPeter Avalos if (bp-buf >= sz)
1374b588458SPeter Avalos if (!adjbuf(&buf, &sz, bp-buf+2, 100, &bp, "gettok"))
1384b588458SPeter Avalos FATAL( "out of space for number %.10s...", buf );
1394b588458SPeter Avalos if (isdigit(c) || c == 'e' || c == 'E'
1404b588458SPeter Avalos || c == '.' || c == '+' || c == '-')
1414b588458SPeter Avalos *bp++ = c;
1424b588458SPeter Avalos else {
1434b588458SPeter Avalos unput(c);
1444b588458SPeter Avalos break;
1454b588458SPeter Avalos }
1464b588458SPeter Avalos }
1474b588458SPeter Avalos *bp = 0;
1484b588458SPeter Avalos strtod(buf, &rem); /* parse the number */
1494b588458SPeter Avalos if (rem == buf) { /* it wasn't a valid number at all */
1504b588458SPeter Avalos buf[1] = 0; /* return one character as token */
15148f09a05SAntonio Huete Jimenez retc = (uschar)buf[0]; /* character is its own type */
1524b588458SPeter Avalos unputstr(rem+1); /* put rest back for later */
1534b588458SPeter Avalos } else { /* some prefix was a number */
1544b588458SPeter Avalos unputstr(rem); /* put rest back for later */
1554b588458SPeter Avalos rem[0] = 0; /* truncate buf after number part */
1564b588458SPeter Avalos retc = '0'; /* type is number */
1574b588458SPeter Avalos }
1584b588458SPeter Avalos }
1594b588458SPeter Avalos *pbuf = buf;
1604b588458SPeter Avalos *psz = sz;
1614b588458SPeter Avalos return retc;
1624b588458SPeter Avalos }
1634b588458SPeter Avalos
1644b588458SPeter Avalos int word(char *);
1654b588458SPeter Avalos int string(void);
1664b588458SPeter Avalos int regexpr(void);
1671d48fce0SDaniel Fojt bool sc = false; /* true => return a } right now */
1681d48fce0SDaniel Fojt bool reg = false; /* true => return a REGEXPR now */
1694b588458SPeter Avalos
yylex(void)1704b588458SPeter Avalos int yylex(void)
1714b588458SPeter Avalos {
1724b588458SPeter Avalos int c;
1731d48fce0SDaniel Fojt static char *buf = NULL;
1744b588458SPeter Avalos static int bufsize = 5; /* BUG: setting this small causes core dump! */
1754b588458SPeter Avalos
17648f09a05SAntonio Huete Jimenez if (buf == NULL && (buf = (char *) malloc(bufsize)) == NULL)
1774b588458SPeter Avalos FATAL( "out of space in yylex" );
1784b588458SPeter Avalos if (sc) {
1791d48fce0SDaniel Fojt sc = false;
1804b588458SPeter Avalos RET('}');
1814b588458SPeter Avalos }
1824b588458SPeter Avalos if (reg) {
1831d48fce0SDaniel Fojt reg = false;
1844b588458SPeter Avalos return regexpr();
1854b588458SPeter Avalos }
1864b588458SPeter Avalos for (;;) {
1874b588458SPeter Avalos c = gettok(&buf, &bufsize);
1884b588458SPeter Avalos if (c == 0)
1894b588458SPeter Avalos return 0;
1904b588458SPeter Avalos if (isalpha(c) || c == '_')
1914b588458SPeter Avalos return word(buf);
1924b588458SPeter Avalos if (isdigit(c)) {
1931d48fce0SDaniel Fojt char *cp = tostring(buf);
19448f09a05SAntonio Huete Jimenez double result;
19548f09a05SAntonio Huete Jimenez
19648f09a05SAntonio Huete Jimenez if (is_number(cp, & result))
19748f09a05SAntonio Huete Jimenez yylval.cp = setsymtab(buf, cp, result, CON|NUM, symtab);
19848f09a05SAntonio Huete Jimenez else
19948f09a05SAntonio Huete Jimenez yylval.cp = setsymtab(buf, cp, 0.0, STR, symtab);
2001d48fce0SDaniel Fojt free(cp);
2014b588458SPeter Avalos /* should this also have STR set? */
2024b588458SPeter Avalos RET(NUMBER);
2034b588458SPeter Avalos }
2044b588458SPeter Avalos
2054b588458SPeter Avalos yylval.i = c;
2064b588458SPeter Avalos switch (c) {
2074b588458SPeter Avalos case '\n': /* {EOL} */
2081d48fce0SDaniel Fojt lineno++;
2094b588458SPeter Avalos RET(NL);
2104b588458SPeter Avalos case '\r': /* assume \n is coming */
2114b588458SPeter Avalos case ' ': /* {WS}+ */
2124b588458SPeter Avalos case '\t':
2134b588458SPeter Avalos break;
2144b588458SPeter Avalos case '#': /* #.* strip comments */
2154b588458SPeter Avalos while ((c = input()) != '\n' && c != 0)
2164b588458SPeter Avalos ;
2174b588458SPeter Avalos unput(c);
2181d48fce0SDaniel Fojt /*
2191d48fce0SDaniel Fojt * Next line is a hack, itcompensates for
2201d48fce0SDaniel Fojt * unput's treatment of \n.
2211d48fce0SDaniel Fojt */
2221d48fce0SDaniel Fojt lineno++;
2234b588458SPeter Avalos break;
2244b588458SPeter Avalos case ';':
2254b588458SPeter Avalos RET(';');
2264b588458SPeter Avalos case '\\':
2274b588458SPeter Avalos if (peek() == '\n') {
2284b588458SPeter Avalos input();
2291d48fce0SDaniel Fojt lineno++;
2304b588458SPeter Avalos } else if (peek() == '\r') {
2314b588458SPeter Avalos input(); input(); /* \n */
2324b588458SPeter Avalos lineno++;
2334b588458SPeter Avalos } else {
2344b588458SPeter Avalos RET(c);
2354b588458SPeter Avalos }
2364b588458SPeter Avalos break;
2374b588458SPeter Avalos case '&':
2384b588458SPeter Avalos if (peek() == '&') {
2394b588458SPeter Avalos input(); RET(AND);
2404b588458SPeter Avalos } else
2414b588458SPeter Avalos RET('&');
2424b588458SPeter Avalos case '|':
2434b588458SPeter Avalos if (peek() == '|') {
2444b588458SPeter Avalos input(); RET(BOR);
2454b588458SPeter Avalos } else
2464b588458SPeter Avalos RET('|');
2474b588458SPeter Avalos case '!':
2484b588458SPeter Avalos if (peek() == '=') {
2494b588458SPeter Avalos input(); yylval.i = NE; RET(NE);
2504b588458SPeter Avalos } else if (peek() == '~') {
2514b588458SPeter Avalos input(); yylval.i = NOTMATCH; RET(MATCHOP);
2524b588458SPeter Avalos } else
2534b588458SPeter Avalos RET(NOT);
2544b588458SPeter Avalos case '~':
2554b588458SPeter Avalos yylval.i = MATCH;
2564b588458SPeter Avalos RET(MATCHOP);
2574b588458SPeter Avalos case '<':
2584b588458SPeter Avalos if (peek() == '=') {
2594b588458SPeter Avalos input(); yylval.i = LE; RET(LE);
2604b588458SPeter Avalos } else {
2614b588458SPeter Avalos yylval.i = LT; RET(LT);
2624b588458SPeter Avalos }
2634b588458SPeter Avalos case '=':
2644b588458SPeter Avalos if (peek() == '=') {
2654b588458SPeter Avalos input(); yylval.i = EQ; RET(EQ);
2664b588458SPeter Avalos } else {
2674b588458SPeter Avalos yylval.i = ASSIGN; RET(ASGNOP);
2684b588458SPeter Avalos }
2694b588458SPeter Avalos case '>':
2704b588458SPeter Avalos if (peek() == '=') {
2714b588458SPeter Avalos input(); yylval.i = GE; RET(GE);
2724b588458SPeter Avalos } else if (peek() == '>') {
2734b588458SPeter Avalos input(); yylval.i = APPEND; RET(APPEND);
2744b588458SPeter Avalos } else {
2754b588458SPeter Avalos yylval.i = GT; RET(GT);
2764b588458SPeter Avalos }
2774b588458SPeter Avalos case '+':
2784b588458SPeter Avalos if (peek() == '+') {
2794b588458SPeter Avalos input(); yylval.i = INCR; RET(INCR);
2804b588458SPeter Avalos } else if (peek() == '=') {
2814b588458SPeter Avalos input(); yylval.i = ADDEQ; RET(ASGNOP);
2824b588458SPeter Avalos } else
2834b588458SPeter Avalos RET('+');
2844b588458SPeter Avalos case '-':
2854b588458SPeter Avalos if (peek() == '-') {
2864b588458SPeter Avalos input(); yylval.i = DECR; RET(DECR);
2874b588458SPeter Avalos } else if (peek() == '=') {
2884b588458SPeter Avalos input(); yylval.i = SUBEQ; RET(ASGNOP);
2894b588458SPeter Avalos } else
2904b588458SPeter Avalos RET('-');
2914b588458SPeter Avalos case '*':
2924b588458SPeter Avalos if (peek() == '=') { /* *= */
2934b588458SPeter Avalos input(); yylval.i = MULTEQ; RET(ASGNOP);
2944b588458SPeter Avalos } else if (peek() == '*') { /* ** or **= */
2954b588458SPeter Avalos input(); /* eat 2nd * */
2964b588458SPeter Avalos if (peek() == '=') {
2974b588458SPeter Avalos input(); yylval.i = POWEQ; RET(ASGNOP);
2984b588458SPeter Avalos } else {
2994b588458SPeter Avalos RET(POWER);
3004b588458SPeter Avalos }
3014b588458SPeter Avalos } else
3024b588458SPeter Avalos RET('*');
3034b588458SPeter Avalos case '/':
3044b588458SPeter Avalos RET('/');
3054b588458SPeter Avalos case '%':
3064b588458SPeter Avalos if (peek() == '=') {
3074b588458SPeter Avalos input(); yylval.i = MODEQ; RET(ASGNOP);
3084b588458SPeter Avalos } else
3094b588458SPeter Avalos RET('%');
3104b588458SPeter Avalos case '^':
3114b588458SPeter Avalos if (peek() == '=') {
3124b588458SPeter Avalos input(); yylval.i = POWEQ; RET(ASGNOP);
3134b588458SPeter Avalos } else
3144b588458SPeter Avalos RET(POWER);
3154b588458SPeter Avalos
3164b588458SPeter Avalos case '$':
3174b588458SPeter Avalos /* BUG: awkward, if not wrong */
3184b588458SPeter Avalos c = gettok(&buf, &bufsize);
3194b588458SPeter Avalos if (isalpha(c)) {
3204b588458SPeter Avalos if (strcmp(buf, "NF") == 0) { /* very special */
3214b588458SPeter Avalos unputstr("(NF)");
3224b588458SPeter Avalos RET(INDIRECT);
3234b588458SPeter Avalos }
3244b588458SPeter Avalos c = peek();
3254b588458SPeter Avalos if (c == '(' || c == '[' || (infunc && isarg(buf) >= 0)) {
3264b588458SPeter Avalos unputstr(buf);
3274b588458SPeter Avalos RET(INDIRECT);
3284b588458SPeter Avalos }
3294b588458SPeter Avalos yylval.cp = setsymtab(buf, "", 0.0, STR|NUM, symtab);
3304b588458SPeter Avalos RET(IVAR);
3314b588458SPeter Avalos } else if (c == 0) { /* */
3324b588458SPeter Avalos SYNTAX( "unexpected end of input after $" );
3334b588458SPeter Avalos RET(';');
3344b588458SPeter Avalos } else {
3354b588458SPeter Avalos unputstr(buf);
3364b588458SPeter Avalos RET(INDIRECT);
3374b588458SPeter Avalos }
3384b588458SPeter Avalos
3394b588458SPeter Avalos case '}':
3404b588458SPeter Avalos if (--bracecnt < 0)
3414b588458SPeter Avalos SYNTAX( "extra }" );
3421d48fce0SDaniel Fojt sc = true;
3434b588458SPeter Avalos RET(';');
3444b588458SPeter Avalos case ']':
3454b588458SPeter Avalos if (--brackcnt < 0)
3464b588458SPeter Avalos SYNTAX( "extra ]" );
3474b588458SPeter Avalos RET(']');
3484b588458SPeter Avalos case ')':
3494b588458SPeter Avalos if (--parencnt < 0)
3504b588458SPeter Avalos SYNTAX( "extra )" );
3514b588458SPeter Avalos RET(')');
3524b588458SPeter Avalos case '{':
3534b588458SPeter Avalos bracecnt++;
3544b588458SPeter Avalos RET('{');
3554b588458SPeter Avalos case '[':
3564b588458SPeter Avalos brackcnt++;
3574b588458SPeter Avalos RET('[');
3584b588458SPeter Avalos case '(':
3594b588458SPeter Avalos parencnt++;
3604b588458SPeter Avalos RET('(');
3614b588458SPeter Avalos
3624b588458SPeter Avalos case '"':
3634b588458SPeter Avalos return string(); /* BUG: should be like tran.c ? */
3644b588458SPeter Avalos
3654b588458SPeter Avalos default:
3664b588458SPeter Avalos RET(c);
3674b588458SPeter Avalos }
3684b588458SPeter Avalos }
3694b588458SPeter Avalos }
3704b588458SPeter Avalos
371*ed569bc2SAaron LI extern int runetochar(char *str, int c);
372*ed569bc2SAaron LI
string(void)3734b588458SPeter Avalos int string(void)
3744b588458SPeter Avalos {
3754b588458SPeter Avalos int c, n;
3764b588458SPeter Avalos char *s, *bp;
3771d48fce0SDaniel Fojt static char *buf = NULL;
3784b588458SPeter Avalos static int bufsz = 500;
3794b588458SPeter Avalos
38048f09a05SAntonio Huete Jimenez if (buf == NULL && (buf = (char *) malloc(bufsz)) == NULL)
3814b588458SPeter Avalos FATAL("out of space for strings");
3824b588458SPeter Avalos for (bp = buf; (c = input()) != '"'; ) {
3834b588458SPeter Avalos if (!adjbuf(&buf, &bufsz, bp-buf+2, 500, &bp, "string"))
3844b588458SPeter Avalos FATAL("out of space for string %.10s...", buf);
3854b588458SPeter Avalos switch (c) {
3864b588458SPeter Avalos case '\n':
3874b588458SPeter Avalos case '\r':
3884b588458SPeter Avalos case 0:
3891d48fce0SDaniel Fojt *bp = '\0';
3904b588458SPeter Avalos SYNTAX( "non-terminated string %.10s...", buf );
3914b588458SPeter Avalos if (c == 0) /* hopeless */
3924b588458SPeter Avalos FATAL( "giving up" );
3931d48fce0SDaniel Fojt lineno++;
3944b588458SPeter Avalos break;
3954b588458SPeter Avalos case '\\':
3964b588458SPeter Avalos c = input();
3974b588458SPeter Avalos switch (c) {
3981d48fce0SDaniel Fojt case '\n': break;
3994b588458SPeter Avalos case '"': *bp++ = '"'; break;
4004b588458SPeter Avalos case 'n': *bp++ = '\n'; break;
4014b588458SPeter Avalos case 't': *bp++ = '\t'; break;
4024b588458SPeter Avalos case 'f': *bp++ = '\f'; break;
4034b588458SPeter Avalos case 'r': *bp++ = '\r'; break;
4044b588458SPeter Avalos case 'b': *bp++ = '\b'; break;
4054b588458SPeter Avalos case 'v': *bp++ = '\v'; break;
4061d48fce0SDaniel Fojt case 'a': *bp++ = '\a'; break;
4074b588458SPeter Avalos case '\\': *bp++ = '\\'; break;
4084b588458SPeter Avalos
4094b588458SPeter Avalos case '0': case '1': case '2': /* octal: \d \dd \ddd */
4104b588458SPeter Avalos case '3': case '4': case '5': case '6': case '7':
4114b588458SPeter Avalos n = c - '0';
4124b588458SPeter Avalos if ((c = peek()) >= '0' && c < '8') {
4134b588458SPeter Avalos n = 8 * n + input() - '0';
4144b588458SPeter Avalos if ((c = peek()) >= '0' && c < '8')
4154b588458SPeter Avalos n = 8 * n + input() - '0';
4164b588458SPeter Avalos }
4174b588458SPeter Avalos *bp++ = n;
4184b588458SPeter Avalos break;
4194b588458SPeter Avalos
420*ed569bc2SAaron LI case 'x': /* hex \x0-9a-fA-F (exactly two) */
421*ed569bc2SAaron LI {
422*ed569bc2SAaron LI int i;
423*ed569bc2SAaron LI
424*ed569bc2SAaron LI if (!isxdigit(peek())) {
425*ed569bc2SAaron LI unput(c);
4264b588458SPeter Avalos break;
4274b588458SPeter Avalos }
428*ed569bc2SAaron LI n = 0;
429*ed569bc2SAaron LI for (i = 0; i < 2; i++) {
430*ed569bc2SAaron LI c = input();
431*ed569bc2SAaron LI if (c == 0)
432*ed569bc2SAaron LI break;
433*ed569bc2SAaron LI if (isxdigit(c)) {
434*ed569bc2SAaron LI c = tolower(c);
435*ed569bc2SAaron LI n *= 16;
436*ed569bc2SAaron LI if (isdigit(c))
437*ed569bc2SAaron LI n += (c - '0');
438*ed569bc2SAaron LI else
439*ed569bc2SAaron LI n += 10 + (c - 'a');
440*ed569bc2SAaron LI } else {
4414b588458SPeter Avalos unput(c);
442*ed569bc2SAaron LI break;
443*ed569bc2SAaron LI }
444*ed569bc2SAaron LI }
445*ed569bc2SAaron LI if (i)
4464b588458SPeter Avalos *bp++ = n;
4474b588458SPeter Avalos break;
4484b588458SPeter Avalos }
4494b588458SPeter Avalos
450*ed569bc2SAaron LI case 'u': /* utf \u0-9a-fA-F (1..8) */
451*ed569bc2SAaron LI {
452*ed569bc2SAaron LI int i;
453*ed569bc2SAaron LI
454*ed569bc2SAaron LI n = 0;
455*ed569bc2SAaron LI for (i = 0; i < 8; i++) {
456*ed569bc2SAaron LI c = input();
457*ed569bc2SAaron LI if (!isxdigit(c) || c == 0)
458*ed569bc2SAaron LI break;
459*ed569bc2SAaron LI c = tolower(c);
460*ed569bc2SAaron LI n *= 16;
461*ed569bc2SAaron LI if (isdigit(c))
462*ed569bc2SAaron LI n += (c - '0');
463*ed569bc2SAaron LI else
464*ed569bc2SAaron LI n += 10 + (c - 'a');
465*ed569bc2SAaron LI }
466*ed569bc2SAaron LI unput(c);
467*ed569bc2SAaron LI bp += runetochar(bp, n);
468*ed569bc2SAaron LI break;
469*ed569bc2SAaron LI }
470*ed569bc2SAaron LI
4714b588458SPeter Avalos default:
4724b588458SPeter Avalos *bp++ = c;
4734b588458SPeter Avalos break;
4744b588458SPeter Avalos }
4754b588458SPeter Avalos break;
4764b588458SPeter Avalos default:
4774b588458SPeter Avalos *bp++ = c;
4784b588458SPeter Avalos break;
4794b588458SPeter Avalos }
4804b588458SPeter Avalos }
4814b588458SPeter Avalos *bp = 0;
4824b588458SPeter Avalos s = tostring(buf);
4831d48fce0SDaniel Fojt *bp++ = ' '; *bp++ = '\0';
4844b588458SPeter Avalos yylval.cp = setsymtab(buf, s, 0.0, CON|STR|DONTFREE, symtab);
4851d48fce0SDaniel Fojt free(s);
4864b588458SPeter Avalos RET(STRING);
4874b588458SPeter Avalos }
4884b588458SPeter Avalos
4894b588458SPeter Avalos
binsearch(char * w,const Keyword * kp,int n)4901d48fce0SDaniel Fojt static int binsearch(char *w, const Keyword *kp, int n)
4914b588458SPeter Avalos {
4924b588458SPeter Avalos int cond, low, mid, high;
4934b588458SPeter Avalos
4944b588458SPeter Avalos low = 0;
4954b588458SPeter Avalos high = n - 1;
4964b588458SPeter Avalos while (low <= high) {
4974b588458SPeter Avalos mid = (low + high) / 2;
4984b588458SPeter Avalos if ((cond = strcmp(w, kp[mid].word)) < 0)
4994b588458SPeter Avalos high = mid - 1;
5004b588458SPeter Avalos else if (cond > 0)
5014b588458SPeter Avalos low = mid + 1;
5024b588458SPeter Avalos else
5034b588458SPeter Avalos return mid;
5044b588458SPeter Avalos }
5054b588458SPeter Avalos return -1;
5064b588458SPeter Avalos }
5074b588458SPeter Avalos
word(char * w)5084b588458SPeter Avalos int word(char *w)
5094b588458SPeter Avalos {
5101d48fce0SDaniel Fojt const Keyword *kp;
5114b588458SPeter Avalos int c, n;
5124b588458SPeter Avalos
5134b588458SPeter Avalos n = binsearch(w, keywords, sizeof(keywords)/sizeof(keywords[0]));
5144b588458SPeter Avalos if (n != -1) { /* found in table */
5151d48fce0SDaniel Fojt kp = keywords + n;
5164b588458SPeter Avalos yylval.i = kp->sub;
5174b588458SPeter Avalos switch (kp->type) { /* special handling */
5184b588458SPeter Avalos case BLTIN:
5194b588458SPeter Avalos if (kp->sub == FSYSTEM && safe)
5204b588458SPeter Avalos SYNTAX( "system is unsafe" );
5214b588458SPeter Avalos RET(kp->type);
5224b588458SPeter Avalos case FUNC:
5234b588458SPeter Avalos if (infunc)
5244b588458SPeter Avalos SYNTAX( "illegal nested function" );
5254b588458SPeter Avalos RET(kp->type);
5264b588458SPeter Avalos case RETURN:
5274b588458SPeter Avalos if (!infunc)
5284b588458SPeter Avalos SYNTAX( "return not in function" );
5294b588458SPeter Avalos RET(kp->type);
5304b588458SPeter Avalos case VARNF:
5314b588458SPeter Avalos yylval.cp = setsymtab("NF", "", 0.0, NUM, symtab);
5324b588458SPeter Avalos RET(VARNF);
5334b588458SPeter Avalos default:
5344b588458SPeter Avalos RET(kp->type);
5354b588458SPeter Avalos }
5364b588458SPeter Avalos }
5374b588458SPeter Avalos c = peek(); /* look for '(' */
5384b588458SPeter Avalos if (c != '(' && infunc && (n=isarg(w)) >= 0) {
5394b588458SPeter Avalos yylval.i = n;
5404b588458SPeter Avalos RET(ARG);
5414b588458SPeter Avalos } else {
5424b588458SPeter Avalos yylval.cp = setsymtab(w, "", 0.0, STR|NUM|DONTFREE, symtab);
5434b588458SPeter Avalos if (c == '(') {
5444b588458SPeter Avalos RET(CALL);
5454b588458SPeter Avalos } else {
5464b588458SPeter Avalos RET(VAR);
5474b588458SPeter Avalos }
5484b588458SPeter Avalos }
5494b588458SPeter Avalos }
5504b588458SPeter Avalos
startreg(void)5514b588458SPeter Avalos void startreg(void) /* next call to yylex will return a regular expression */
5524b588458SPeter Avalos {
5531d48fce0SDaniel Fojt reg = true;
5544b588458SPeter Avalos }
5554b588458SPeter Avalos
regexpr(void)5564b588458SPeter Avalos int regexpr(void)
5574b588458SPeter Avalos {
5584b588458SPeter Avalos int c;
5591d48fce0SDaniel Fojt static char *buf = NULL;
5604b588458SPeter Avalos static int bufsz = 500;
5614b588458SPeter Avalos char *bp;
5624b588458SPeter Avalos
56348f09a05SAntonio Huete Jimenez if (buf == NULL && (buf = (char *) malloc(bufsz)) == NULL)
56448f09a05SAntonio Huete Jimenez FATAL("out of space for reg expr");
5654b588458SPeter Avalos bp = buf;
5664b588458SPeter Avalos for ( ; (c = input()) != '/' && c != 0; ) {
5674b588458SPeter Avalos if (!adjbuf(&buf, &bufsz, bp-buf+3, 500, &bp, "regexpr"))
5684b588458SPeter Avalos FATAL("out of space for reg expr %.10s...", buf);
5694b588458SPeter Avalos if (c == '\n') {
5701d48fce0SDaniel Fojt *bp = '\0';
5714b588458SPeter Avalos SYNTAX( "newline in regular expression %.10s...", buf );
5724b588458SPeter Avalos unput('\n');
5734b588458SPeter Avalos break;
5744b588458SPeter Avalos } else if (c == '\\') {
5754b588458SPeter Avalos *bp++ = '\\';
5764b588458SPeter Avalos *bp++ = input();
5774b588458SPeter Avalos } else {
5784b588458SPeter Avalos *bp++ = c;
5794b588458SPeter Avalos }
5804b588458SPeter Avalos }
5814b588458SPeter Avalos *bp = 0;
5824b588458SPeter Avalos if (c == 0)
5834b588458SPeter Avalos SYNTAX("non-terminated regular expression %.10s...", buf);
584*ed569bc2SAaron LI yylval.s = tostring(buf);
5854b588458SPeter Avalos unput('/');
5864b588458SPeter Avalos RET(REGEXPR);
5874b588458SPeter Avalos }
5884b588458SPeter Avalos
5894b588458SPeter Avalos /* low-level lexical stuff, sort of inherited from lex */
5904b588458SPeter Avalos
5914b588458SPeter Avalos char ebuf[300];
5924b588458SPeter Avalos char *ep = ebuf;
5934b588458SPeter Avalos char yysbuf[100]; /* pushback buffer */
5944b588458SPeter Avalos char *yysptr = yysbuf;
5951d48fce0SDaniel Fojt FILE *yyin = NULL;
5964b588458SPeter Avalos
input(void)5974b588458SPeter Avalos int input(void) /* get next lexical input character */
5984b588458SPeter Avalos {
5994b588458SPeter Avalos int c;
6004b588458SPeter Avalos extern char *lexprog;
6014b588458SPeter Avalos
6024b588458SPeter Avalos if (yysptr > yysbuf)
6034b588458SPeter Avalos c = (uschar)*--yysptr;
6044b588458SPeter Avalos else if (lexprog != NULL) { /* awk '...' */
6054b588458SPeter Avalos if ((c = (uschar)*lexprog) != 0)
6064b588458SPeter Avalos lexprog++;
6074b588458SPeter Avalos } else /* awk -f ... */
6084b588458SPeter Avalos c = pgetc();
6091d48fce0SDaniel Fojt if (c == EOF)
6104b588458SPeter Avalos c = 0;
6114b588458SPeter Avalos if (ep >= ebuf + sizeof ebuf)
6124b588458SPeter Avalos ep = ebuf;
6131d48fce0SDaniel Fojt *ep = c;
6141d48fce0SDaniel Fojt if (c != 0) {
6151d48fce0SDaniel Fojt ep++;
6161d48fce0SDaniel Fojt }
6171d48fce0SDaniel Fojt return (c);
6184b588458SPeter Avalos }
6194b588458SPeter Avalos
unput(int c)6204b588458SPeter Avalos void unput(int c) /* put lexical character back on input */
6214b588458SPeter Avalos {
6224b588458SPeter Avalos if (c == '\n')
6234b588458SPeter Avalos lineno--;
6244b588458SPeter Avalos if (yysptr >= yysbuf + sizeof(yysbuf))
6254b588458SPeter Avalos FATAL("pushed back too much: %.20s...", yysbuf);
6264b588458SPeter Avalos *yysptr++ = c;
6274b588458SPeter Avalos if (--ep < ebuf)
6284b588458SPeter Avalos ep = ebuf + sizeof(ebuf) - 1;
6294b588458SPeter Avalos }
6304b588458SPeter Avalos
unputstr(const char * s)6314b588458SPeter Avalos void unputstr(const char *s) /* put a string back on input */
6324b588458SPeter Avalos {
6334b588458SPeter Avalos int i;
6344b588458SPeter Avalos
6354b588458SPeter Avalos for (i = strlen(s)-1; i >= 0; i--)
6364b588458SPeter Avalos unput(s[i]);
6374b588458SPeter Avalos }
638