19679Slinton /* Copyright (c) 1982 Regents of the University of California */ 29679Slinton 3*11559Slinton static char sccsid[] = "@(#)scanner.c 1.4 03/13/83"; 49679Slinton 59679Slinton /* 69679Slinton * Debugger scanner. 79679Slinton */ 89679Slinton 99679Slinton #include "defs.h" 109679Slinton #include "scanner.h" 119679Slinton #include "main.h" 129679Slinton #include "keywords.h" 139679Slinton #include "tree.h" 149679Slinton #include "symbols.h" 159679Slinton #include "names.h" 169679Slinton #include "y.tab.h" 179679Slinton 189679Slinton #ifndef public 199679Slinton typedef int Token; 209679Slinton #endif 219679Slinton 229679Slinton public String initfile = ".dbxinit"; 239679Slinton 249679Slinton typedef enum { WHITE, ALPHA, NUM, OTHER } Charclass; 259679Slinton 269679Slinton private Charclass class[256 + 1]; 279679Slinton private Charclass *lexclass = class + 1; 289679Slinton 299679Slinton #define isdigit(c) (lexclass[c] == NUM) 309679Slinton #define isalnum(c) (lexclass[c] == ALPHA or lexclass[c] == NUM) 319679Slinton #define ishexdigit(c) ( \ 329679Slinton isdigit(c) or (c >= 'a' and c <= 'f') or (c >= 'A' and c <= 'F') \ 339679Slinton ) 349679Slinton 359679Slinton #define MAXLINESIZE 1024 369679Slinton 379679Slinton private File in; 389679Slinton private Char linebuf[MAXLINESIZE]; 399679Slinton private Char *curchar; 409679Slinton 419679Slinton #define MAXINCLDEPTH 10 429679Slinton 439679Slinton private struct { 449679Slinton File savefile; 459679Slinton Filename savefn; 469679Slinton int savelineno; 479679Slinton } inclinfo[MAXINCLDEPTH]; 489679Slinton 499679Slinton private unsigned int curinclindex; 509679Slinton 519679Slinton private Boolean firsttoken = true; 529679Slinton private Boolean firstinit = true; 539679Slinton 549679Slinton private Token getident(); 559679Slinton private Token getnum(); 569679Slinton private Token getstring(); 579679Slinton private Boolean eofinput(); 589679Slinton private Char charcon(); 599679Slinton private Char charlookup(); 609679Slinton 619679Slinton private enterlexclass(class, s) 629679Slinton Charclass class; 639679Slinton String s; 649679Slinton { 659679Slinton register char *p; 669679Slinton 679679Slinton for (p = s; *p != '\0'; p++) { 689679Slinton lexclass[*p] = class; 699679Slinton } 709679Slinton } 719679Slinton 729679Slinton public scanner_init() 739679Slinton { 749679Slinton register Integer i; 759679Slinton 769679Slinton for (i = 0; i < 257; i++) { 779679Slinton class[i] = OTHER; 789679Slinton } 799679Slinton enterlexclass(WHITE, " \t"); 809679Slinton enterlexclass(ALPHA, "abcdefghijklmnopqrstuvwxyz"); 819679Slinton enterlexclass(ALPHA, "ABCDEFGHIJKLMNOPQRSTUVWXYZ_$"); 829679Slinton enterlexclass(NUM, "0123456789"); 839679Slinton in = stdin; 849679Slinton errfilename = nil; 859679Slinton errlineno = 0; 869679Slinton curchar = linebuf; 879679Slinton linebuf[0] = '\0'; 889679Slinton if (runfirst) { 899679Slinton firstinit = false; 909679Slinton firsttoken = false; 919679Slinton } else if (firstinit and isterm(in)) { 929679Slinton firstinit = false; 939679Slinton printf("> "); 949679Slinton fflush(stdout); 959679Slinton } 969679Slinton } 979679Slinton 989679Slinton /* 999679Slinton * Read a single token. 1009679Slinton * 1019679Slinton * Input is line buffered. 1029679Slinton * 1039679Slinton * There are two "modes" of operation: one as in a compiler, 1049679Slinton * and one for reading shell-like syntax. 1059679Slinton */ 1069679Slinton 1079679Slinton private Boolean shellmode; 1089679Slinton 1099679Slinton public Token yylex() 1109679Slinton { 1119679Slinton register int c; 1129679Slinton register char *p; 1139679Slinton register Token t; 1149679Slinton String line; 1159679Slinton 1169679Slinton p = curchar; 1179679Slinton if (*p == '\0') { 1189679Slinton do { 1199679Slinton if (isterm(in)) { 1209679Slinton if (firsttoken) { 1219679Slinton firsttoken = false; 1229679Slinton } else { 1239679Slinton printf("> "); 1249679Slinton fflush(stdout); 1259679Slinton } 1269679Slinton } 1279679Slinton line = fgets(linebuf, MAXLINESIZE, in); 1289679Slinton } while (line == nil and not eofinput()); 1299679Slinton if (line == nil) { 1309679Slinton c = EOF; 1319679Slinton } else { 1329679Slinton p = linebuf; 1339679Slinton while (lexclass[*p] == WHITE) { 1349679Slinton p++; 1359679Slinton } 1369679Slinton shellmode = false; 1379679Slinton } 1389679Slinton } else { 1399679Slinton while (lexclass[*p] == WHITE) { 1409679Slinton p++; 1419679Slinton } 1429679Slinton } 1439679Slinton curchar = p; 1449679Slinton c = *p; 1459679Slinton if (lexclass[c] == ALPHA) { 1469679Slinton t = getident(); 1479679Slinton } else if (lexclass[c] == NUM) { 1489679Slinton t = getnum(); 1499679Slinton } else { 1509679Slinton ++curchar; 1519679Slinton switch (c) { 1529679Slinton case '\n': 1539679Slinton t = '\n'; 1549679Slinton if (errlineno != 0) { 1559679Slinton errlineno++; 1569679Slinton } 1579679Slinton break; 1589679Slinton 1599679Slinton case '"': 1609679Slinton case '\'': 1619679Slinton t = getstring(); 1629679Slinton break; 1639679Slinton 1649679Slinton case '.': 1659679Slinton if (shellmode) { 1669679Slinton --curchar; 1679679Slinton t = getident(); 1689679Slinton } else if (isdigit(*curchar)) { 1699679Slinton --curchar; 1709679Slinton t = getnum(); 1719679Slinton } else { 1729679Slinton t = '.'; 1739679Slinton } 1749679Slinton break; 1759679Slinton 1769679Slinton case '<': 1779679Slinton if (not shellmode and *curchar == '<') { 1789679Slinton ++curchar; 1799679Slinton t = LFORMER; 1809679Slinton } else { 1819679Slinton t = '<'; 1829679Slinton } 1839679Slinton break; 1849679Slinton 1859679Slinton case '>': 1869679Slinton if (not shellmode and *curchar == '>') { 1879679Slinton ++curchar; 1889679Slinton t = RFORMER; 1899679Slinton } else { 1909679Slinton t = '>'; 1919679Slinton } 1929679Slinton break; 1939679Slinton 1949679Slinton case '#': 1959679Slinton if (*curchar == '^') { 1969679Slinton ++curchar; 1979679Slinton t = ABSTRACTION; 1989679Slinton } else { 1999679Slinton t = '#'; 2009679Slinton } 2019679Slinton break; 2029679Slinton 2039679Slinton case '-': 2049679Slinton if (shellmode) { 2059679Slinton --curchar; 2069679Slinton t = getident(); 2079679Slinton } else if (*curchar == '>') { 2089679Slinton ++curchar; 2099679Slinton t = ARROW; 2109679Slinton } else { 2119679Slinton t = '-'; 2129679Slinton } 2139679Slinton break; 2149679Slinton 2159679Slinton case EOF: 2169679Slinton t = 0; 2179679Slinton break; 2189679Slinton 2199679Slinton default: 2209679Slinton if (shellmode and index("!&*()[]", c) == nil) { 2219679Slinton --curchar; 2229679Slinton t = getident(); 2239679Slinton } else { 2249679Slinton t = c; 2259679Slinton } 2269679Slinton break; 2279679Slinton } 2289679Slinton } 2299679Slinton # ifdef LEXDEBUG 2309679Slinton if (lexdebug) { 2319679Slinton fprintf(stderr, "yylex returns "); 2329679Slinton print_token(stderr, t); 2339679Slinton fprintf(stderr, "\n"); 2349679Slinton } 2359679Slinton # endif 2369679Slinton return t; 2379679Slinton } 2389679Slinton 2399679Slinton /* 2409679Slinton * Parser error handling. 2419679Slinton */ 2429679Slinton 2439679Slinton public yyerror(s) 2449679Slinton String s; 2459679Slinton { 2469679Slinton register Char *p, *tokenbegin, *tokenend; 2479679Slinton register Integer len; 2489679Slinton 2499679Slinton if (streq(s, "syntax error")) { 2509679Slinton beginerrmsg(); 2519679Slinton tokenend = curchar - 1; 2529679Slinton tokenbegin = tokenend; 2539679Slinton while (lexclass[*tokenbegin] != WHITE and tokenbegin > &linebuf[0]) { 2549679Slinton --tokenbegin; 2559679Slinton } 2569679Slinton len = tokenend - tokenbegin + 1; 2579679Slinton p = tokenbegin; 2589679Slinton if (p > &linebuf[0]) { 2599679Slinton while (lexclass[*p] == WHITE and p > &linebuf[0]) { 2609679Slinton --p; 2619679Slinton } 2629679Slinton } 2639679Slinton if (p == &linebuf[0]) { 2649679Slinton fprintf(stderr, "unrecognized command \"%.*s\"", len, tokenbegin); 2659679Slinton } else { 2669679Slinton fprintf(stderr, "syntax error"); 2679679Slinton if (len != 0) { 2689679Slinton fprintf(stderr, " on \"%.*s\"", len, tokenbegin); 2699679Slinton } 2709679Slinton } 2719679Slinton enderrmsg(); 2729679Slinton } else { 2739679Slinton error(s); 2749679Slinton } 2759679Slinton } 2769679Slinton 2779679Slinton /* 2789679Slinton * Eat the current line. 2799679Slinton */ 2809679Slinton 2819679Slinton public gobble() 2829679Slinton { 2839679Slinton curchar = linebuf; 2849679Slinton linebuf[0] = '\0'; 2859679Slinton } 2869679Slinton 2879679Slinton /* 2889679Slinton * Scan an identifier and check to see if it's a keyword. 2899679Slinton */ 2909679Slinton 2919679Slinton private Token getident() 2929679Slinton { 2939679Slinton char buf[256]; 2949679Slinton register Char *p, *q; 2959679Slinton register Token t; 2969679Slinton 2979679Slinton p = curchar; 2989679Slinton q = buf; 2999679Slinton if (shellmode) { 3009679Slinton do { 3019679Slinton *q++ = *p++; 3029679Slinton } while (index(" \t\n!&<>*[]()", *p) == nil); 3039679Slinton } else { 3049679Slinton do { 3059679Slinton *q++ = *p++; 3069679Slinton } while (isalnum(*p)); 3079679Slinton } 3089679Slinton curchar = p; 3099679Slinton *q = '\0'; 3109679Slinton yylval.y_name = identname(buf, false); 3119679Slinton if (not shellmode) { 3129679Slinton t = findkeyword(yylval.y_name); 3139679Slinton if (t == nil) { 3149679Slinton t = NAME; 3159679Slinton } 3169679Slinton } else { 3179679Slinton t = NAME; 3189679Slinton } 3199679Slinton return t; 3209679Slinton } 3219679Slinton 3229679Slinton /* 3239679Slinton * Scan a number. 3249679Slinton */ 3259679Slinton 3269679Slinton private Token getnum() 3279679Slinton { 3289679Slinton char buf[256]; 3299679Slinton register Char *p, *q; 3309679Slinton register Token t; 3319679Slinton Integer base; 3329679Slinton 3339679Slinton p = curchar; 3349679Slinton q = buf; 3359679Slinton if (*p == '0') { 3369679Slinton if (*(p+1) == 'x') { 3379679Slinton p += 2; 3389679Slinton base = 16; 3399679Slinton } else { 3409679Slinton base = 8; 3419679Slinton } 3429679Slinton } else { 3439679Slinton base = 10; 3449679Slinton } 3459679Slinton if (base == 16) { 3469679Slinton do { 3479679Slinton *q++ = *p++; 3489679Slinton } while (ishexdigit(*p)); 3499679Slinton } else { 3509679Slinton do { 3519679Slinton *q++ = *p++; 3529679Slinton } while (isdigit(*p)); 3539679Slinton } 3549679Slinton if (*p == '.') { 3559679Slinton do { 3569679Slinton *q++ = *p++; 3579679Slinton } while (isdigit(*p)); 3589679Slinton if (*p == 'e' or *p == 'E') { 3599679Slinton p++; 3609679Slinton if (*p == '+' or *p == '-' or isdigit(*p)) { 3619679Slinton *q++ = 'e'; 3629679Slinton do { 3639679Slinton *q++ = *p++; 3649679Slinton } while (isdigit(*p)); 3659679Slinton } 3669679Slinton } 3679679Slinton *q = '\0'; 3689679Slinton yylval.y_real = atof(buf); 3699679Slinton t = REAL; 3709679Slinton } else { 3719679Slinton *q = '\0'; 3729679Slinton switch (base) { 3739679Slinton case 10: 3749679Slinton yylval.y_int = atol(buf); 3759679Slinton break; 3769679Slinton 3779679Slinton case 8: 3789679Slinton yylval.y_int = octal(buf); 3799679Slinton break; 3809679Slinton 3819679Slinton case 16: 3829679Slinton yylval.y_int = hex(buf); 3839679Slinton break; 3849679Slinton 3859679Slinton default: 3869679Slinton badcaseval(base); 3879679Slinton } 3889679Slinton t = INT; 3899679Slinton } 3909679Slinton curchar = p; 3919679Slinton return t; 3929679Slinton } 3939679Slinton 3949679Slinton /* 3959679Slinton * Convert a string of octal digits to an integer. 3969679Slinton */ 3979679Slinton 3989679Slinton private int octal(s) 3999679Slinton String s; 4009679Slinton { 4019679Slinton register Char *p; 4029679Slinton register Integer n; 4039679Slinton 4049679Slinton n = 0; 4059679Slinton for (p = s; *p != '\0'; p++) { 4069679Slinton n = 8*n + (*p - '0'); 4079679Slinton } 4089679Slinton return n; 4099679Slinton } 4109679Slinton 4119679Slinton /* 4129679Slinton * Convert a string of hexadecimal digits to an integer. 4139679Slinton */ 4149679Slinton 4159679Slinton private int hex(s) 4169679Slinton String s; 4179679Slinton { 4189679Slinton register Char *p; 4199679Slinton register Integer n; 4209679Slinton 4219679Slinton n = 0; 4229679Slinton for (p = s; *p != '\0'; p++) { 4239679Slinton n *= 16; 4249679Slinton if (*p >= 'a' and *p <= 'f') { 4259679Slinton n += (*p - 'a' + 10); 4269679Slinton } else if (*p >= 'A' and *p <= 'F') { 4279679Slinton n += (*p - 'A' + 10); 4289679Slinton } else { 4299679Slinton n += (*p - '0'); 4309679Slinton } 4319679Slinton } 4329679Slinton return n; 4339679Slinton } 4349679Slinton 4359679Slinton /* 4369679Slinton * Scan a string. 4379679Slinton */ 4389679Slinton 4399679Slinton private Token getstring() 4409679Slinton { 4419679Slinton char buf[256]; 4429679Slinton register Char *p, *q; 4439679Slinton Boolean endofstring; 4449679Slinton 4459679Slinton p = curchar; 4469679Slinton q = buf; 4479679Slinton endofstring = false; 4489679Slinton while (not endofstring) { 4499679Slinton if (*p == '\n' or *p == '\0') { 4509679Slinton error("non-terminated string"); 4519679Slinton endofstring = true; 45211122Slinton } else if (*p == '"' or *p == '\'') { 45311122Slinton if (*(p+1) != *p) { 4549679Slinton endofstring = true; 4559679Slinton } else { 4569679Slinton *q++ = *p; 4579679Slinton } 4589679Slinton } else { 459*11559Slinton *q++ = charcon(p); 460*11559Slinton p = curchar; 4619679Slinton } 4629679Slinton p++; 4639679Slinton } 4649679Slinton curchar = p; 4659679Slinton *q = '\0'; 4669679Slinton yylval.y_string = strdup(buf); 4679679Slinton return STRING; 4689679Slinton } 4699679Slinton 4709679Slinton /* 4719679Slinton * Process a character constant. 4729679Slinton * Watch out for backslashes. 4739679Slinton */ 4749679Slinton 475*11559Slinton private Char charcon(p) 476*11559Slinton char *p; 4779679Slinton { 478*11559Slinton char c, buf[10], *q; 4799679Slinton 480*11559Slinton if (*p == '\\') { 481*11559Slinton ++p; 4829679Slinton if (*p != '\\') { 4839679Slinton q = buf; 4849679Slinton do { 4859679Slinton *q++ = *p++; 486*11559Slinton } while (*p != '\\' and *p != '\'' and *p != '\n' and *p != '\0'); 4879679Slinton *q = '\0'; 4889679Slinton if (isdigit(buf[0])) { 4899679Slinton c = (Char) octal(buf); 4909679Slinton } else { 4919679Slinton c = charlookup(buf); 4929679Slinton } 493*11559Slinton curchar = p - 1; 4949679Slinton } else { 4959679Slinton c = '\\'; 4969679Slinton } 4979679Slinton } else { 498*11559Slinton c = *p; 4999679Slinton } 5009679Slinton return c; 5019679Slinton } 5029679Slinton 5039679Slinton /* 5049679Slinton * Do a lookup for a ASCII character name. 5059679Slinton */ 5069679Slinton 5079679Slinton private String ascii[] = { 5089679Slinton "NUL", "SOH", "STX", "ETX", "EOT", "ENQ", "ACK", "BEL", 5099679Slinton "BS", "HT", "NL", "VT", "NP", "CR", "SO", "SI", 5109679Slinton "DLE", "DC1", "DC2", "DC3", "DC4", "NAK", "SYN", "ETB", 5119679Slinton "CAN", "EM", "SUB", "ESC", "FS", "GS", "RS", "US", 5129679Slinton "SP", nil 5139679Slinton }; 5149679Slinton 5159679Slinton private char charlookup(s) 5169679Slinton String s; 5179679Slinton { 5189679Slinton register int i; 5199679Slinton 5209679Slinton for (i = 0; ascii[i] != NULL; i++) { 5219679Slinton if (streq(s, ascii[i])) { 5229679Slinton return i; 5239679Slinton } 5249679Slinton } 5259679Slinton if (streq(s, "DEL")) { 5269679Slinton return 0177; 5279679Slinton } 5289679Slinton error("unknown ascii name \"%s\"", s); 5299679Slinton return '?'; 5309679Slinton } 5319679Slinton 5329679Slinton /* 5339679Slinton * Input file management routines. 5349679Slinton */ 5359679Slinton 5369679Slinton public setinput(filename) 5379679Slinton Filename filename; 5389679Slinton { 5399679Slinton File f; 5409679Slinton 5419679Slinton f = fopen(filename, "r"); 5429679Slinton if (f == nil) { 5439679Slinton error("can't open %s", filename); 5449679Slinton } else { 5459679Slinton if (curinclindex >= MAXINCLDEPTH) { 5469679Slinton error("unreasonable input nesting on \"%s\"", filename); 5479679Slinton } 5489679Slinton inclinfo[curinclindex].savefile = in; 5499679Slinton inclinfo[curinclindex].savefn = errfilename; 5509679Slinton inclinfo[curinclindex].savelineno = errlineno; 5519679Slinton curinclindex++; 5529679Slinton in = f; 5539679Slinton errfilename = filename; 5549679Slinton errlineno = 1; 5559679Slinton } 5569679Slinton } 5579679Slinton 5589679Slinton private Boolean eofinput() 5599679Slinton { 5609679Slinton register Boolean b; 5619679Slinton 5629679Slinton if (curinclindex == 0) { 5639679Slinton if (isterm(in)) { 5649679Slinton putchar('\n'); 5659679Slinton b = false; 5669679Slinton } else { 5679679Slinton b = true; 5689679Slinton } 5699679Slinton } else { 5709679Slinton fclose(in); 5719679Slinton --curinclindex; 5729679Slinton in = inclinfo[curinclindex].savefile; 5739679Slinton errfilename = inclinfo[curinclindex].savefn; 5749679Slinton errlineno = inclinfo[curinclindex].savelineno; 5759679Slinton b = false; 5769679Slinton } 5779679Slinton return b; 5789679Slinton } 5799679Slinton 5809679Slinton /* 5819679Slinton * Pop the current input. Return whether successful. 5829679Slinton */ 5839679Slinton 5849679Slinton public Boolean popinput() 5859679Slinton { 5869679Slinton Boolean b; 5879679Slinton 5889679Slinton if (curinclindex == 0) { 5899679Slinton b = false; 5909679Slinton } else { 5919679Slinton b = (Boolean) (not eofinput()); 5929679Slinton } 5939679Slinton return b; 5949679Slinton } 5959679Slinton 5969679Slinton /* 5979679Slinton * Return whether we are currently reading from standard input. 5989679Slinton */ 5999679Slinton 6009679Slinton public Boolean isstdin() 6019679Slinton { 6029679Slinton return (Boolean) (in == stdin); 6039679Slinton } 6049679Slinton 6059679Slinton /* 6069679Slinton * Send the current line to the shell. 6079679Slinton */ 6089679Slinton 6099679Slinton public shellline() 6109679Slinton { 6119679Slinton register char *p; 6129679Slinton 6139679Slinton p = curchar; 6149679Slinton while (*p != '\0' and (*p == '\n' or lexclass[*p] == WHITE)) { 6159679Slinton ++p; 6169679Slinton } 6179679Slinton shell(p); 6189679Slinton if (*p == '\0' and isterm(in)) { 6199679Slinton putchar('\n'); 6209679Slinton } 6219679Slinton erecover(); 6229679Slinton } 6239679Slinton 6249679Slinton /* 6259679Slinton * Read the rest of the current line in "shell mode". 6269679Slinton */ 6279679Slinton 6289679Slinton public beginshellmode() 6299679Slinton { 6309679Slinton shellmode = true; 6319679Slinton } 6329679Slinton 6339679Slinton /* 6349679Slinton * Print out a token for debugging. 6359679Slinton */ 6369679Slinton 6379679Slinton public print_token(f, t) 6389679Slinton File f; 6399679Slinton Token t; 6409679Slinton { 6419679Slinton if (t == '\n') { 6429679Slinton fprintf(f, "char '\\n'"); 6439679Slinton } else if (t == EOF) { 6449679Slinton fprintf(f, "EOF"); 6459679Slinton } else if (t < 256) { 6469679Slinton fprintf(f, "char '%c'", t); 6479679Slinton } else { 6489679Slinton fprintf(f, "\"%s\"", keywdstring(t)); 6499679Slinton } 6509679Slinton } 651