19679Slinton /* Copyright (c) 1982 Regents of the University of California */ 29679Slinton 3*11122Slinton static char sccsid[] = "@(#)scanner.c 1.3 02/17/83"; 49679Slinton 59679Slinton /* 69679Slinton * Debugger scanner. 79679Slinton */ 89679Slinton 99679Slinton #include "defs.h" 109679Slinton #include "scanner.h" 119679Slinton #include "main.h" 129679Slinton #include "keywords.h" 139679Slinton #include "tree.h" 149679Slinton #include "symbols.h" 159679Slinton #include "names.h" 169679Slinton #include "y.tab.h" 179679Slinton 189679Slinton #ifndef public 199679Slinton typedef int Token; 209679Slinton #endif 219679Slinton 229679Slinton public String initfile = ".dbxinit"; 239679Slinton 249679Slinton typedef enum { WHITE, ALPHA, NUM, OTHER } Charclass; 259679Slinton 269679Slinton private Charclass class[256 + 1]; 279679Slinton private Charclass *lexclass = class + 1; 289679Slinton 299679Slinton #define isdigit(c) (lexclass[c] == NUM) 309679Slinton #define isalnum(c) (lexclass[c] == ALPHA or lexclass[c] == NUM) 319679Slinton #define ishexdigit(c) ( \ 329679Slinton isdigit(c) or (c >= 'a' and c <= 'f') or (c >= 'A' and c <= 'F') \ 339679Slinton ) 349679Slinton 359679Slinton #define MAXLINESIZE 1024 369679Slinton 379679Slinton private File in; 389679Slinton private Char linebuf[MAXLINESIZE]; 399679Slinton private Char *curchar; 409679Slinton 419679Slinton #define MAXINCLDEPTH 10 429679Slinton 439679Slinton private struct { 449679Slinton File savefile; 459679Slinton Filename savefn; 469679Slinton int savelineno; 479679Slinton } inclinfo[MAXINCLDEPTH]; 489679Slinton 499679Slinton private unsigned int curinclindex; 509679Slinton 519679Slinton private Boolean firsttoken = true; 529679Slinton private Boolean firstinit = true; 539679Slinton 549679Slinton private Token getident(); 559679Slinton private Token getnum(); 569679Slinton private Token getstring(); 579679Slinton private Boolean eofinput(); 589679Slinton private Char charcon(); 599679Slinton private Char charlookup(); 609679Slinton 619679Slinton private enterlexclass(class, s) 629679Slinton Charclass class; 639679Slinton String s; 649679Slinton { 659679Slinton register char *p; 669679Slinton 679679Slinton for (p = s; *p != '\0'; p++) { 689679Slinton lexclass[*p] = class; 699679Slinton } 709679Slinton } 719679Slinton 729679Slinton public scanner_init() 739679Slinton { 749679Slinton register Integer i; 759679Slinton 769679Slinton for (i = 0; i < 257; i++) { 779679Slinton class[i] = OTHER; 789679Slinton } 799679Slinton enterlexclass(WHITE, " \t"); 809679Slinton enterlexclass(ALPHA, "abcdefghijklmnopqrstuvwxyz"); 819679Slinton enterlexclass(ALPHA, "ABCDEFGHIJKLMNOPQRSTUVWXYZ_$"); 829679Slinton enterlexclass(NUM, "0123456789"); 839679Slinton in = stdin; 849679Slinton errfilename = nil; 859679Slinton errlineno = 0; 869679Slinton curchar = linebuf; 879679Slinton linebuf[0] = '\0'; 889679Slinton if (runfirst) { 899679Slinton firstinit = false; 909679Slinton firsttoken = false; 919679Slinton } else if (firstinit and isterm(in)) { 929679Slinton firstinit = false; 939679Slinton printf("> "); 949679Slinton fflush(stdout); 959679Slinton } 969679Slinton } 979679Slinton 989679Slinton /* 999679Slinton * Read a single token. 1009679Slinton * 1019679Slinton * Input is line buffered. 1029679Slinton * 1039679Slinton * There are two "modes" of operation: one as in a compiler, 1049679Slinton * and one for reading shell-like syntax. 1059679Slinton */ 1069679Slinton 1079679Slinton private Boolean shellmode; 1089679Slinton 1099679Slinton public Token yylex() 1109679Slinton { 1119679Slinton register int c; 1129679Slinton register char *p; 1139679Slinton register Token t; 1149679Slinton String line; 1159679Slinton 1169679Slinton p = curchar; 1179679Slinton if (*p == '\0') { 1189679Slinton do { 1199679Slinton if (isterm(in)) { 1209679Slinton if (firsttoken) { 1219679Slinton firsttoken = false; 1229679Slinton } else { 1239679Slinton printf("> "); 1249679Slinton fflush(stdout); 1259679Slinton } 1269679Slinton } 1279679Slinton line = fgets(linebuf, MAXLINESIZE, in); 1289679Slinton } while (line == nil and not eofinput()); 1299679Slinton if (line == nil) { 1309679Slinton c = EOF; 1319679Slinton } else { 1329679Slinton p = linebuf; 1339679Slinton while (lexclass[*p] == WHITE) { 1349679Slinton p++; 1359679Slinton } 1369679Slinton shellmode = false; 1379679Slinton } 1389679Slinton } else { 1399679Slinton while (lexclass[*p] == WHITE) { 1409679Slinton p++; 1419679Slinton } 1429679Slinton } 1439679Slinton curchar = p; 1449679Slinton c = *p; 1459679Slinton if (lexclass[c] == ALPHA) { 1469679Slinton t = getident(); 1479679Slinton } else if (lexclass[c] == NUM) { 1489679Slinton t = getnum(); 1499679Slinton } else { 1509679Slinton ++curchar; 1519679Slinton switch (c) { 1529679Slinton case '\n': 1539679Slinton t = '\n'; 1549679Slinton if (errlineno != 0) { 1559679Slinton errlineno++; 1569679Slinton } 1579679Slinton break; 1589679Slinton 1599679Slinton case '"': 1609679Slinton case '\'': 1619679Slinton t = getstring(); 1629679Slinton break; 1639679Slinton 1649679Slinton case '.': 1659679Slinton if (shellmode) { 1669679Slinton --curchar; 1679679Slinton t = getident(); 1689679Slinton } else if (isdigit(*curchar)) { 1699679Slinton --curchar; 1709679Slinton t = getnum(); 1719679Slinton } else { 1729679Slinton t = '.'; 1739679Slinton } 1749679Slinton break; 1759679Slinton 1769679Slinton case '<': 1779679Slinton if (not shellmode and *curchar == '<') { 1789679Slinton ++curchar; 1799679Slinton t = LFORMER; 1809679Slinton } else { 1819679Slinton t = '<'; 1829679Slinton } 1839679Slinton break; 1849679Slinton 1859679Slinton case '>': 1869679Slinton if (not shellmode and *curchar == '>') { 1879679Slinton ++curchar; 1889679Slinton t = RFORMER; 1899679Slinton } else { 1909679Slinton t = '>'; 1919679Slinton } 1929679Slinton break; 1939679Slinton 1949679Slinton case '#': 1959679Slinton if (*curchar == '^') { 1969679Slinton ++curchar; 1979679Slinton t = ABSTRACTION; 1989679Slinton } else { 1999679Slinton t = '#'; 2009679Slinton } 2019679Slinton break; 2029679Slinton 2039679Slinton case '-': 2049679Slinton if (shellmode) { 2059679Slinton --curchar; 2069679Slinton t = getident(); 2079679Slinton } else if (*curchar == '>') { 2089679Slinton ++curchar; 2099679Slinton t = ARROW; 2109679Slinton } else { 2119679Slinton t = '-'; 2129679Slinton } 2139679Slinton break; 2149679Slinton 2159679Slinton case EOF: 2169679Slinton t = 0; 2179679Slinton break; 2189679Slinton 2199679Slinton default: 2209679Slinton if (shellmode and index("!&*()[]", c) == nil) { 2219679Slinton --curchar; 2229679Slinton t = getident(); 2239679Slinton } else { 2249679Slinton t = c; 2259679Slinton } 2269679Slinton break; 2279679Slinton } 2289679Slinton } 2299679Slinton # ifdef LEXDEBUG 2309679Slinton if (lexdebug) { 2319679Slinton fprintf(stderr, "yylex returns "); 2329679Slinton print_token(stderr, t); 2339679Slinton fprintf(stderr, "\n"); 2349679Slinton } 2359679Slinton # endif 2369679Slinton return t; 2379679Slinton } 2389679Slinton 2399679Slinton /* 2409679Slinton * Parser error handling. 2419679Slinton */ 2429679Slinton 2439679Slinton public yyerror(s) 2449679Slinton String s; 2459679Slinton { 2469679Slinton register Char *p, *tokenbegin, *tokenend; 2479679Slinton register Integer len; 2489679Slinton 2499679Slinton if (streq(s, "syntax error")) { 2509679Slinton beginerrmsg(); 2519679Slinton tokenend = curchar - 1; 2529679Slinton tokenbegin = tokenend; 2539679Slinton while (lexclass[*tokenbegin] != WHITE and tokenbegin > &linebuf[0]) { 2549679Slinton --tokenbegin; 2559679Slinton } 2569679Slinton len = tokenend - tokenbegin + 1; 2579679Slinton p = tokenbegin; 2589679Slinton if (p > &linebuf[0]) { 2599679Slinton while (lexclass[*p] == WHITE and p > &linebuf[0]) { 2609679Slinton --p; 2619679Slinton } 2629679Slinton } 2639679Slinton if (p == &linebuf[0]) { 2649679Slinton fprintf(stderr, "unrecognized command \"%.*s\"", len, tokenbegin); 2659679Slinton } else { 2669679Slinton fprintf(stderr, "syntax error"); 2679679Slinton if (len != 0) { 2689679Slinton fprintf(stderr, " on \"%.*s\"", len, tokenbegin); 2699679Slinton } 2709679Slinton } 2719679Slinton enderrmsg(); 2729679Slinton } else { 2739679Slinton error(s); 2749679Slinton } 2759679Slinton } 2769679Slinton 2779679Slinton /* 2789679Slinton * Eat the current line. 2799679Slinton */ 2809679Slinton 2819679Slinton public gobble() 2829679Slinton { 2839679Slinton curchar = linebuf; 2849679Slinton linebuf[0] = '\0'; 2859679Slinton } 2869679Slinton 2879679Slinton /* 2889679Slinton * Scan an identifier and check to see if it's a keyword. 2899679Slinton */ 2909679Slinton 2919679Slinton private Token getident() 2929679Slinton { 2939679Slinton char buf[256]; 2949679Slinton register Char *p, *q; 2959679Slinton register Token t; 2969679Slinton 2979679Slinton p = curchar; 2989679Slinton q = buf; 2999679Slinton if (shellmode) { 3009679Slinton do { 3019679Slinton *q++ = *p++; 3029679Slinton } while (index(" \t\n!&<>*[]()", *p) == nil); 3039679Slinton } else { 3049679Slinton do { 3059679Slinton *q++ = *p++; 3069679Slinton } while (isalnum(*p)); 3079679Slinton } 3089679Slinton curchar = p; 3099679Slinton *q = '\0'; 3109679Slinton yylval.y_name = identname(buf, false); 3119679Slinton if (not shellmode) { 3129679Slinton t = findkeyword(yylval.y_name); 3139679Slinton if (t == nil) { 3149679Slinton t = NAME; 3159679Slinton } 3169679Slinton } else { 3179679Slinton t = NAME; 3189679Slinton } 3199679Slinton return t; 3209679Slinton } 3219679Slinton 3229679Slinton /* 3239679Slinton * Scan a number. 3249679Slinton */ 3259679Slinton 3269679Slinton private Token getnum() 3279679Slinton { 3289679Slinton char buf[256]; 3299679Slinton register Char *p, *q; 3309679Slinton register Token t; 3319679Slinton Integer base; 3329679Slinton 3339679Slinton p = curchar; 3349679Slinton q = buf; 3359679Slinton if (*p == '0') { 3369679Slinton if (*(p+1) == 'x') { 3379679Slinton p += 2; 3389679Slinton base = 16; 3399679Slinton } else { 3409679Slinton base = 8; 3419679Slinton } 3429679Slinton } else { 3439679Slinton base = 10; 3449679Slinton } 3459679Slinton if (base == 16) { 3469679Slinton do { 3479679Slinton *q++ = *p++; 3489679Slinton } while (ishexdigit(*p)); 3499679Slinton } else { 3509679Slinton do { 3519679Slinton *q++ = *p++; 3529679Slinton } while (isdigit(*p)); 3539679Slinton } 3549679Slinton if (*p == '.') { 3559679Slinton do { 3569679Slinton *q++ = *p++; 3579679Slinton } while (isdigit(*p)); 3589679Slinton if (*p == 'e' or *p == 'E') { 3599679Slinton p++; 3609679Slinton if (*p == '+' or *p == '-' or isdigit(*p)) { 3619679Slinton *q++ = 'e'; 3629679Slinton do { 3639679Slinton *q++ = *p++; 3649679Slinton } while (isdigit(*p)); 3659679Slinton } 3669679Slinton } 3679679Slinton *q = '\0'; 3689679Slinton yylval.y_real = atof(buf); 3699679Slinton t = REAL; 3709679Slinton } else { 3719679Slinton *q = '\0'; 3729679Slinton switch (base) { 3739679Slinton case 10: 3749679Slinton yylval.y_int = atol(buf); 3759679Slinton break; 3769679Slinton 3779679Slinton case 8: 3789679Slinton yylval.y_int = octal(buf); 3799679Slinton break; 3809679Slinton 3819679Slinton case 16: 3829679Slinton yylval.y_int = hex(buf); 3839679Slinton break; 3849679Slinton 3859679Slinton default: 3869679Slinton badcaseval(base); 3879679Slinton } 3889679Slinton t = INT; 3899679Slinton } 3909679Slinton curchar = p; 3919679Slinton return t; 3929679Slinton } 3939679Slinton 3949679Slinton /* 3959679Slinton * Convert a string of octal digits to an integer. 3969679Slinton */ 3979679Slinton 3989679Slinton private int octal(s) 3999679Slinton String s; 4009679Slinton { 4019679Slinton register Char *p; 4029679Slinton register Integer n; 4039679Slinton 4049679Slinton n = 0; 4059679Slinton for (p = s; *p != '\0'; p++) { 4069679Slinton n = 8*n + (*p - '0'); 4079679Slinton } 4089679Slinton return n; 4099679Slinton } 4109679Slinton 4119679Slinton /* 4129679Slinton * Convert a string of hexadecimal digits to an integer. 4139679Slinton */ 4149679Slinton 4159679Slinton private int hex(s) 4169679Slinton String s; 4179679Slinton { 4189679Slinton register Char *p; 4199679Slinton register Integer n; 4209679Slinton 4219679Slinton n = 0; 4229679Slinton for (p = s; *p != '\0'; p++) { 4239679Slinton n *= 16; 4249679Slinton if (*p >= 'a' and *p <= 'f') { 4259679Slinton n += (*p - 'a' + 10); 4269679Slinton } else if (*p >= 'A' and *p <= 'F') { 4279679Slinton n += (*p - 'A' + 10); 4289679Slinton } else { 4299679Slinton n += (*p - '0'); 4309679Slinton } 4319679Slinton } 4329679Slinton return n; 4339679Slinton } 4349679Slinton 4359679Slinton /* 4369679Slinton * Scan a string. 4379679Slinton */ 4389679Slinton 4399679Slinton private Token getstring() 4409679Slinton { 4419679Slinton char buf[256]; 4429679Slinton register Char *p, *q; 4439679Slinton Boolean endofstring; 4449679Slinton 4459679Slinton p = curchar; 4469679Slinton q = buf; 4479679Slinton endofstring = false; 4489679Slinton while (not endofstring) { 4499679Slinton if (*p == '\n' or *p == '\0') { 4509679Slinton error("non-terminated string"); 4519679Slinton endofstring = true; 452*11122Slinton } else if (*p == '"' or *p == '\'') { 453*11122Slinton if (*(p+1) != *p) { 4549679Slinton endofstring = true; 4559679Slinton } else { 4569679Slinton *q++ = *p; 4579679Slinton } 4589679Slinton } else { 4599679Slinton *q++ = charcon(*p); 4609679Slinton } 4619679Slinton p++; 4629679Slinton } 4639679Slinton curchar = p; 4649679Slinton *q = '\0'; 4659679Slinton yylval.y_string = strdup(buf); 4669679Slinton return STRING; 4679679Slinton } 4689679Slinton 4699679Slinton /* 4709679Slinton * Process a character constant. 4719679Slinton * Watch out for backslashes. 4729679Slinton */ 4739679Slinton 4749679Slinton private Char charcon(ch) 4759679Slinton Char ch; 4769679Slinton { 4779679Slinton Char c, buf[10], *p, *q; 4789679Slinton 4799679Slinton p = curchar; 4809679Slinton if (ch == '\\') { 4819679Slinton if (*p != '\\') { 4829679Slinton q = buf; 4839679Slinton do { 4849679Slinton *q++ = *p++; 4859679Slinton } while (*p != '\\' and *p != '\n' and *p != '\0'); 4869679Slinton if (*p != '\\') { 4879679Slinton ungetc(*p, in); 4889679Slinton error("non-terminated character constant"); 4899679Slinton } 4909679Slinton *q = '\0'; 4919679Slinton if (isdigit(buf[0])) { 4929679Slinton c = (Char) octal(buf); 4939679Slinton } else { 4949679Slinton c = charlookup(buf); 4959679Slinton } 4969679Slinton curchar = p; 4979679Slinton } else { 4989679Slinton c = '\\'; 4999679Slinton } 5009679Slinton } else { 5019679Slinton c = ch; 5029679Slinton } 5039679Slinton return c; 5049679Slinton } 5059679Slinton 5069679Slinton /* 5079679Slinton * Do a lookup for a ASCII character name. 5089679Slinton */ 5099679Slinton 5109679Slinton private String ascii[] = { 5119679Slinton "NUL", "SOH", "STX", "ETX", "EOT", "ENQ", "ACK", "BEL", 5129679Slinton "BS", "HT", "NL", "VT", "NP", "CR", "SO", "SI", 5139679Slinton "DLE", "DC1", "DC2", "DC3", "DC4", "NAK", "SYN", "ETB", 5149679Slinton "CAN", "EM", "SUB", "ESC", "FS", "GS", "RS", "US", 5159679Slinton "SP", nil 5169679Slinton }; 5179679Slinton 5189679Slinton private char charlookup(s) 5199679Slinton String s; 5209679Slinton { 5219679Slinton register int i; 5229679Slinton 5239679Slinton for (i = 0; ascii[i] != NULL; i++) { 5249679Slinton if (streq(s, ascii[i])) { 5259679Slinton return i; 5269679Slinton } 5279679Slinton } 5289679Slinton if (streq(s, "DEL")) { 5299679Slinton return 0177; 5309679Slinton } 5319679Slinton error("unknown ascii name \"%s\"", s); 5329679Slinton return '?'; 5339679Slinton } 5349679Slinton 5359679Slinton /* 5369679Slinton * Input file management routines. 5379679Slinton */ 5389679Slinton 5399679Slinton public setinput(filename) 5409679Slinton Filename filename; 5419679Slinton { 5429679Slinton File f; 5439679Slinton 5449679Slinton f = fopen(filename, "r"); 5459679Slinton if (f == nil) { 5469679Slinton error("can't open %s", filename); 5479679Slinton } else { 5489679Slinton if (curinclindex >= MAXINCLDEPTH) { 5499679Slinton error("unreasonable input nesting on \"%s\"", filename); 5509679Slinton } 5519679Slinton inclinfo[curinclindex].savefile = in; 5529679Slinton inclinfo[curinclindex].savefn = errfilename; 5539679Slinton inclinfo[curinclindex].savelineno = errlineno; 5549679Slinton curinclindex++; 5559679Slinton in = f; 5569679Slinton errfilename = filename; 5579679Slinton errlineno = 1; 5589679Slinton } 5599679Slinton } 5609679Slinton 5619679Slinton private Boolean eofinput() 5629679Slinton { 5639679Slinton register Boolean b; 5649679Slinton 5659679Slinton if (curinclindex == 0) { 5669679Slinton if (isterm(in)) { 5679679Slinton putchar('\n'); 5689679Slinton b = false; 5699679Slinton } else { 5709679Slinton b = true; 5719679Slinton } 5729679Slinton } else { 5739679Slinton fclose(in); 5749679Slinton --curinclindex; 5759679Slinton in = inclinfo[curinclindex].savefile; 5769679Slinton errfilename = inclinfo[curinclindex].savefn; 5779679Slinton errlineno = inclinfo[curinclindex].savelineno; 5789679Slinton b = false; 5799679Slinton } 5809679Slinton return b; 5819679Slinton } 5829679Slinton 5839679Slinton /* 5849679Slinton * Pop the current input. Return whether successful. 5859679Slinton */ 5869679Slinton 5879679Slinton public Boolean popinput() 5889679Slinton { 5899679Slinton Boolean b; 5909679Slinton 5919679Slinton if (curinclindex == 0) { 5929679Slinton b = false; 5939679Slinton } else { 5949679Slinton b = (Boolean) (not eofinput()); 5959679Slinton } 5969679Slinton return b; 5979679Slinton } 5989679Slinton 5999679Slinton /* 6009679Slinton * Return whether we are currently reading from standard input. 6019679Slinton */ 6029679Slinton 6039679Slinton public Boolean isstdin() 6049679Slinton { 6059679Slinton return (Boolean) (in == stdin); 6069679Slinton } 6079679Slinton 6089679Slinton /* 6099679Slinton * Send the current line to the shell. 6109679Slinton */ 6119679Slinton 6129679Slinton public shellline() 6139679Slinton { 6149679Slinton register char *p; 6159679Slinton 6169679Slinton p = curchar; 6179679Slinton while (*p != '\0' and (*p == '\n' or lexclass[*p] == WHITE)) { 6189679Slinton ++p; 6199679Slinton } 6209679Slinton shell(p); 6219679Slinton if (*p == '\0' and isterm(in)) { 6229679Slinton putchar('\n'); 6239679Slinton } 6249679Slinton erecover(); 6259679Slinton } 6269679Slinton 6279679Slinton /* 6289679Slinton * Read the rest of the current line in "shell mode". 6299679Slinton */ 6309679Slinton 6319679Slinton public beginshellmode() 6329679Slinton { 6339679Slinton shellmode = true; 6349679Slinton } 6359679Slinton 6369679Slinton /* 6379679Slinton * Print out a token for debugging. 6389679Slinton */ 6399679Slinton 6409679Slinton public print_token(f, t) 6419679Slinton File f; 6429679Slinton Token t; 6439679Slinton { 6449679Slinton if (t == '\n') { 6459679Slinton fprintf(f, "char '\\n'"); 6469679Slinton } else if (t == EOF) { 6479679Slinton fprintf(f, "EOF"); 6489679Slinton } else if (t < 256) { 6499679Slinton fprintf(f, "char '%c'", t); 6509679Slinton } else { 6519679Slinton fprintf(f, "\"%s\"", keywdstring(t)); 6529679Slinton } 6539679Slinton } 654