19679Slinton /* Copyright (c) 1982 Regents of the University of California */ 29679Slinton 3*12120Slinton static char sccsid[] = "@(#)scanner.c 1.6 04/29/83"; 49679Slinton 59679Slinton /* 69679Slinton * Debugger scanner. 79679Slinton */ 89679Slinton 99679Slinton #include "defs.h" 109679Slinton #include "scanner.h" 119679Slinton #include "main.h" 129679Slinton #include "keywords.h" 139679Slinton #include "tree.h" 149679Slinton #include "symbols.h" 159679Slinton #include "names.h" 169679Slinton #include "y.tab.h" 179679Slinton 189679Slinton #ifndef public 199679Slinton typedef int Token; 209679Slinton #endif 219679Slinton 229679Slinton public String initfile = ".dbxinit"; 239679Slinton 249679Slinton typedef enum { WHITE, ALPHA, NUM, OTHER } Charclass; 259679Slinton 269679Slinton private Charclass class[256 + 1]; 279679Slinton private Charclass *lexclass = class + 1; 289679Slinton 299679Slinton #define isdigit(c) (lexclass[c] == NUM) 309679Slinton #define isalnum(c) (lexclass[c] == ALPHA or lexclass[c] == NUM) 319679Slinton #define ishexdigit(c) ( \ 329679Slinton isdigit(c) or (c >= 'a' and c <= 'f') or (c >= 'A' and c <= 'F') \ 339679Slinton ) 349679Slinton 359679Slinton #define MAXLINESIZE 1024 369679Slinton 379679Slinton private File in; 389679Slinton private Char linebuf[MAXLINESIZE]; 399679Slinton private Char *curchar; 409679Slinton 419679Slinton #define MAXINCLDEPTH 10 429679Slinton 439679Slinton private struct { 449679Slinton File savefile; 459679Slinton Filename savefn; 469679Slinton int savelineno; 479679Slinton } inclinfo[MAXINCLDEPTH]; 489679Slinton 499679Slinton private unsigned int curinclindex; 509679Slinton 519679Slinton private Token getident(); 529679Slinton private Token getnum(); 539679Slinton private Token getstring(); 549679Slinton private Boolean eofinput(); 559679Slinton private Char charcon(); 569679Slinton private Char charlookup(); 579679Slinton 589679Slinton private enterlexclass(class, s) 599679Slinton Charclass class; 609679Slinton String s; 619679Slinton { 629679Slinton register char *p; 639679Slinton 649679Slinton for (p = s; *p != '\0'; p++) { 659679Slinton lexclass[*p] = class; 669679Slinton } 679679Slinton } 689679Slinton 699679Slinton public scanner_init() 709679Slinton { 719679Slinton register Integer i; 729679Slinton 739679Slinton for (i = 0; i < 257; i++) { 749679Slinton class[i] = OTHER; 759679Slinton } 769679Slinton enterlexclass(WHITE, " \t"); 779679Slinton enterlexclass(ALPHA, "abcdefghijklmnopqrstuvwxyz"); 789679Slinton enterlexclass(ALPHA, "ABCDEFGHIJKLMNOPQRSTUVWXYZ_$"); 799679Slinton enterlexclass(NUM, "0123456789"); 809679Slinton in = stdin; 819679Slinton errfilename = nil; 829679Slinton errlineno = 0; 839679Slinton curchar = linebuf; 849679Slinton linebuf[0] = '\0'; 859679Slinton } 869679Slinton 879679Slinton /* 889679Slinton * Read a single token. 899679Slinton * 909679Slinton * Input is line buffered. 919679Slinton * 929679Slinton * There are two "modes" of operation: one as in a compiler, 939679Slinton * and one for reading shell-like syntax. 949679Slinton */ 959679Slinton 969679Slinton private Boolean shellmode; 979679Slinton 989679Slinton public Token yylex() 999679Slinton { 1009679Slinton register int c; 1019679Slinton register char *p; 1029679Slinton register Token t; 1039679Slinton String line; 1049679Slinton 1059679Slinton p = curchar; 1069679Slinton if (*p == '\0') { 1079679Slinton do { 1089679Slinton if (isterm(in)) { 10911767Slinton printf("> "); 11011767Slinton fflush(stdout); 1119679Slinton } 1129679Slinton line = fgets(linebuf, MAXLINESIZE, in); 1139679Slinton } while (line == nil and not eofinput()); 1149679Slinton if (line == nil) { 1159679Slinton c = EOF; 1169679Slinton } else { 1179679Slinton p = linebuf; 1189679Slinton while (lexclass[*p] == WHITE) { 1199679Slinton p++; 1209679Slinton } 1219679Slinton shellmode = false; 1229679Slinton } 1239679Slinton } else { 1249679Slinton while (lexclass[*p] == WHITE) { 1259679Slinton p++; 1269679Slinton } 1279679Slinton } 1289679Slinton curchar = p; 1299679Slinton c = *p; 1309679Slinton if (lexclass[c] == ALPHA) { 1319679Slinton t = getident(); 1329679Slinton } else if (lexclass[c] == NUM) { 133*12120Slinton if (shellmode) { 134*12120Slinton t = getident(); 135*12120Slinton } else { 136*12120Slinton t = getnum(); 137*12120Slinton } 1389679Slinton } else { 1399679Slinton ++curchar; 1409679Slinton switch (c) { 1419679Slinton case '\n': 1429679Slinton t = '\n'; 1439679Slinton if (errlineno != 0) { 1449679Slinton errlineno++; 1459679Slinton } 1469679Slinton break; 1479679Slinton 1489679Slinton case '"': 1499679Slinton case '\'': 1509679Slinton t = getstring(); 1519679Slinton break; 1529679Slinton 1539679Slinton case '.': 1549679Slinton if (shellmode) { 1559679Slinton --curchar; 1569679Slinton t = getident(); 1579679Slinton } else if (isdigit(*curchar)) { 1589679Slinton --curchar; 1599679Slinton t = getnum(); 1609679Slinton } else { 1619679Slinton t = '.'; 1629679Slinton } 1639679Slinton break; 1649679Slinton 1659679Slinton case '<': 1669679Slinton if (not shellmode and *curchar == '<') { 1679679Slinton ++curchar; 1689679Slinton t = LFORMER; 1699679Slinton } else { 1709679Slinton t = '<'; 1719679Slinton } 1729679Slinton break; 1739679Slinton 1749679Slinton case '>': 1759679Slinton if (not shellmode and *curchar == '>') { 1769679Slinton ++curchar; 1779679Slinton t = RFORMER; 1789679Slinton } else { 1799679Slinton t = '>'; 1809679Slinton } 1819679Slinton break; 1829679Slinton 1839679Slinton case '#': 1849679Slinton if (*curchar == '^') { 1859679Slinton ++curchar; 1869679Slinton t = ABSTRACTION; 1879679Slinton } else { 1889679Slinton t = '#'; 1899679Slinton } 1909679Slinton break; 1919679Slinton 1929679Slinton case '-': 1939679Slinton if (shellmode) { 1949679Slinton --curchar; 1959679Slinton t = getident(); 1969679Slinton } else if (*curchar == '>') { 1979679Slinton ++curchar; 1989679Slinton t = ARROW; 1999679Slinton } else { 2009679Slinton t = '-'; 2019679Slinton } 2029679Slinton break; 2039679Slinton 2049679Slinton case EOF: 2059679Slinton t = 0; 2069679Slinton break; 2079679Slinton 2089679Slinton default: 2099679Slinton if (shellmode and index("!&*()[]", c) == nil) { 2109679Slinton --curchar; 2119679Slinton t = getident(); 2129679Slinton } else { 2139679Slinton t = c; 2149679Slinton } 2159679Slinton break; 2169679Slinton } 2179679Slinton } 2189679Slinton # ifdef LEXDEBUG 2199679Slinton if (lexdebug) { 2209679Slinton fprintf(stderr, "yylex returns "); 2219679Slinton print_token(stderr, t); 2229679Slinton fprintf(stderr, "\n"); 2239679Slinton } 2249679Slinton # endif 2259679Slinton return t; 2269679Slinton } 2279679Slinton 2289679Slinton /* 2299679Slinton * Parser error handling. 2309679Slinton */ 2319679Slinton 2329679Slinton public yyerror(s) 2339679Slinton String s; 2349679Slinton { 2359679Slinton register Char *p, *tokenbegin, *tokenend; 2369679Slinton register Integer len; 2379679Slinton 2389679Slinton if (streq(s, "syntax error")) { 2399679Slinton beginerrmsg(); 2409679Slinton tokenend = curchar - 1; 2419679Slinton tokenbegin = tokenend; 2429679Slinton while (lexclass[*tokenbegin] != WHITE and tokenbegin > &linebuf[0]) { 2439679Slinton --tokenbegin; 2449679Slinton } 2459679Slinton len = tokenend - tokenbegin + 1; 2469679Slinton p = tokenbegin; 2479679Slinton if (p > &linebuf[0]) { 2489679Slinton while (lexclass[*p] == WHITE and p > &linebuf[0]) { 2499679Slinton --p; 2509679Slinton } 2519679Slinton } 2529679Slinton if (p == &linebuf[0]) { 2539679Slinton fprintf(stderr, "unrecognized command \"%.*s\"", len, tokenbegin); 2549679Slinton } else { 2559679Slinton fprintf(stderr, "syntax error"); 2569679Slinton if (len != 0) { 2579679Slinton fprintf(stderr, " on \"%.*s\"", len, tokenbegin); 2589679Slinton } 2599679Slinton } 2609679Slinton enderrmsg(); 2619679Slinton } else { 2629679Slinton error(s); 2639679Slinton } 2649679Slinton } 2659679Slinton 2669679Slinton /* 2679679Slinton * Eat the current line. 2689679Slinton */ 2699679Slinton 2709679Slinton public gobble() 2719679Slinton { 2729679Slinton curchar = linebuf; 2739679Slinton linebuf[0] = '\0'; 2749679Slinton } 2759679Slinton 2769679Slinton /* 2779679Slinton * Scan an identifier and check to see if it's a keyword. 2789679Slinton */ 2799679Slinton 2809679Slinton private Token getident() 2819679Slinton { 2829679Slinton char buf[256]; 2839679Slinton register Char *p, *q; 2849679Slinton register Token t; 2859679Slinton 2869679Slinton p = curchar; 2879679Slinton q = buf; 2889679Slinton if (shellmode) { 2899679Slinton do { 2909679Slinton *q++ = *p++; 2919679Slinton } while (index(" \t\n!&<>*[]()", *p) == nil); 2929679Slinton } else { 2939679Slinton do { 2949679Slinton *q++ = *p++; 2959679Slinton } while (isalnum(*p)); 2969679Slinton } 2979679Slinton curchar = p; 2989679Slinton *q = '\0'; 2999679Slinton yylval.y_name = identname(buf, false); 3009679Slinton if (not shellmode) { 3019679Slinton t = findkeyword(yylval.y_name); 3029679Slinton if (t == nil) { 3039679Slinton t = NAME; 3049679Slinton } 3059679Slinton } else { 3069679Slinton t = NAME; 3079679Slinton } 3089679Slinton return t; 3099679Slinton } 3109679Slinton 3119679Slinton /* 3129679Slinton * Scan a number. 3139679Slinton */ 3149679Slinton 3159679Slinton private Token getnum() 3169679Slinton { 3179679Slinton char buf[256]; 3189679Slinton register Char *p, *q; 3199679Slinton register Token t; 3209679Slinton Integer base; 3219679Slinton 3229679Slinton p = curchar; 3239679Slinton q = buf; 3249679Slinton if (*p == '0') { 3259679Slinton if (*(p+1) == 'x') { 3269679Slinton p += 2; 3279679Slinton base = 16; 3289679Slinton } else { 3299679Slinton base = 8; 3309679Slinton } 3319679Slinton } else { 3329679Slinton base = 10; 3339679Slinton } 3349679Slinton if (base == 16) { 3359679Slinton do { 3369679Slinton *q++ = *p++; 3379679Slinton } while (ishexdigit(*p)); 3389679Slinton } else { 3399679Slinton do { 3409679Slinton *q++ = *p++; 3419679Slinton } while (isdigit(*p)); 3429679Slinton } 3439679Slinton if (*p == '.') { 3449679Slinton do { 3459679Slinton *q++ = *p++; 3469679Slinton } while (isdigit(*p)); 3479679Slinton if (*p == 'e' or *p == 'E') { 3489679Slinton p++; 3499679Slinton if (*p == '+' or *p == '-' or isdigit(*p)) { 3509679Slinton *q++ = 'e'; 3519679Slinton do { 3529679Slinton *q++ = *p++; 3539679Slinton } while (isdigit(*p)); 3549679Slinton } 3559679Slinton } 3569679Slinton *q = '\0'; 3579679Slinton yylval.y_real = atof(buf); 3589679Slinton t = REAL; 3599679Slinton } else { 3609679Slinton *q = '\0'; 3619679Slinton switch (base) { 3629679Slinton case 10: 3639679Slinton yylval.y_int = atol(buf); 3649679Slinton break; 3659679Slinton 3669679Slinton case 8: 3679679Slinton yylval.y_int = octal(buf); 3689679Slinton break; 3699679Slinton 3709679Slinton case 16: 3719679Slinton yylval.y_int = hex(buf); 3729679Slinton break; 3739679Slinton 3749679Slinton default: 3759679Slinton badcaseval(base); 3769679Slinton } 3779679Slinton t = INT; 3789679Slinton } 3799679Slinton curchar = p; 3809679Slinton return t; 3819679Slinton } 3829679Slinton 3839679Slinton /* 3849679Slinton * Convert a string of octal digits to an integer. 3859679Slinton */ 3869679Slinton 3879679Slinton private int octal(s) 3889679Slinton String s; 3899679Slinton { 3909679Slinton register Char *p; 3919679Slinton register Integer n; 3929679Slinton 3939679Slinton n = 0; 3949679Slinton for (p = s; *p != '\0'; p++) { 3959679Slinton n = 8*n + (*p - '0'); 3969679Slinton } 3979679Slinton return n; 3989679Slinton } 3999679Slinton 4009679Slinton /* 4019679Slinton * Convert a string of hexadecimal digits to an integer. 4029679Slinton */ 4039679Slinton 4049679Slinton private int hex(s) 4059679Slinton String s; 4069679Slinton { 4079679Slinton register Char *p; 4089679Slinton register Integer n; 4099679Slinton 4109679Slinton n = 0; 4119679Slinton for (p = s; *p != '\0'; p++) { 4129679Slinton n *= 16; 4139679Slinton if (*p >= 'a' and *p <= 'f') { 4149679Slinton n += (*p - 'a' + 10); 4159679Slinton } else if (*p >= 'A' and *p <= 'F') { 4169679Slinton n += (*p - 'A' + 10); 4179679Slinton } else { 4189679Slinton n += (*p - '0'); 4199679Slinton } 4209679Slinton } 4219679Slinton return n; 4229679Slinton } 4239679Slinton 4249679Slinton /* 4259679Slinton * Scan a string. 4269679Slinton */ 4279679Slinton 4289679Slinton private Token getstring() 4299679Slinton { 4309679Slinton char buf[256]; 4319679Slinton register Char *p, *q; 4329679Slinton Boolean endofstring; 4339679Slinton 4349679Slinton p = curchar; 4359679Slinton q = buf; 4369679Slinton endofstring = false; 4379679Slinton while (not endofstring) { 4389679Slinton if (*p == '\n' or *p == '\0') { 4399679Slinton error("non-terminated string"); 4409679Slinton endofstring = true; 44111122Slinton } else if (*p == '"' or *p == '\'') { 44211122Slinton if (*(p+1) != *p) { 4439679Slinton endofstring = true; 4449679Slinton } else { 4459679Slinton *q++ = *p; 4469679Slinton } 4479679Slinton } else { 44811559Slinton *q++ = charcon(p); 44911559Slinton p = curchar; 4509679Slinton } 4519679Slinton p++; 4529679Slinton } 4539679Slinton curchar = p; 4549679Slinton *q = '\0'; 4559679Slinton yylval.y_string = strdup(buf); 4569679Slinton return STRING; 4579679Slinton } 4589679Slinton 4599679Slinton /* 4609679Slinton * Process a character constant. 4619679Slinton * Watch out for backslashes. 4629679Slinton */ 4639679Slinton 46411559Slinton private Char charcon(p) 46511559Slinton char *p; 4669679Slinton { 46711559Slinton char c, buf[10], *q; 4689679Slinton 46911559Slinton if (*p == '\\') { 47011559Slinton ++p; 4719679Slinton if (*p != '\\') { 4729679Slinton q = buf; 4739679Slinton do { 4749679Slinton *q++ = *p++; 47511559Slinton } while (*p != '\\' and *p != '\'' and *p != '\n' and *p != '\0'); 4769679Slinton *q = '\0'; 4779679Slinton if (isdigit(buf[0])) { 4789679Slinton c = (Char) octal(buf); 4799679Slinton } else { 4809679Slinton c = charlookup(buf); 4819679Slinton } 48211559Slinton curchar = p - 1; 4839679Slinton } else { 4849679Slinton c = '\\'; 4859679Slinton } 4869679Slinton } else { 48711559Slinton c = *p; 4889679Slinton } 4899679Slinton return c; 4909679Slinton } 4919679Slinton 4929679Slinton /* 4939679Slinton * Do a lookup for a ASCII character name. 4949679Slinton */ 4959679Slinton 4969679Slinton private String ascii[] = { 4979679Slinton "NUL", "SOH", "STX", "ETX", "EOT", "ENQ", "ACK", "BEL", 4989679Slinton "BS", "HT", "NL", "VT", "NP", "CR", "SO", "SI", 4999679Slinton "DLE", "DC1", "DC2", "DC3", "DC4", "NAK", "SYN", "ETB", 5009679Slinton "CAN", "EM", "SUB", "ESC", "FS", "GS", "RS", "US", 5019679Slinton "SP", nil 5029679Slinton }; 5039679Slinton 5049679Slinton private char charlookup(s) 5059679Slinton String s; 5069679Slinton { 5079679Slinton register int i; 5089679Slinton 5099679Slinton for (i = 0; ascii[i] != NULL; i++) { 5109679Slinton if (streq(s, ascii[i])) { 5119679Slinton return i; 5129679Slinton } 5139679Slinton } 5149679Slinton if (streq(s, "DEL")) { 5159679Slinton return 0177; 5169679Slinton } 5179679Slinton error("unknown ascii name \"%s\"", s); 5189679Slinton return '?'; 5199679Slinton } 5209679Slinton 5219679Slinton /* 5229679Slinton * Input file management routines. 5239679Slinton */ 5249679Slinton 5259679Slinton public setinput(filename) 5269679Slinton Filename filename; 5279679Slinton { 5289679Slinton File f; 5299679Slinton 5309679Slinton f = fopen(filename, "r"); 5319679Slinton if (f == nil) { 5329679Slinton error("can't open %s", filename); 5339679Slinton } else { 5349679Slinton if (curinclindex >= MAXINCLDEPTH) { 5359679Slinton error("unreasonable input nesting on \"%s\"", filename); 5369679Slinton } 5379679Slinton inclinfo[curinclindex].savefile = in; 5389679Slinton inclinfo[curinclindex].savefn = errfilename; 5399679Slinton inclinfo[curinclindex].savelineno = errlineno; 5409679Slinton curinclindex++; 5419679Slinton in = f; 5429679Slinton errfilename = filename; 5439679Slinton errlineno = 1; 5449679Slinton } 5459679Slinton } 5469679Slinton 5479679Slinton private Boolean eofinput() 5489679Slinton { 5499679Slinton register Boolean b; 5509679Slinton 5519679Slinton if (curinclindex == 0) { 5529679Slinton if (isterm(in)) { 5539679Slinton putchar('\n'); 5549679Slinton b = false; 5559679Slinton } else { 5569679Slinton b = true; 5579679Slinton } 5589679Slinton } else { 5599679Slinton fclose(in); 5609679Slinton --curinclindex; 5619679Slinton in = inclinfo[curinclindex].savefile; 5629679Slinton errfilename = inclinfo[curinclindex].savefn; 5639679Slinton errlineno = inclinfo[curinclindex].savelineno; 5649679Slinton b = false; 5659679Slinton } 5669679Slinton return b; 5679679Slinton } 5689679Slinton 5699679Slinton /* 5709679Slinton * Pop the current input. Return whether successful. 5719679Slinton */ 5729679Slinton 5739679Slinton public Boolean popinput() 5749679Slinton { 5759679Slinton Boolean b; 5769679Slinton 5779679Slinton if (curinclindex == 0) { 5789679Slinton b = false; 5799679Slinton } else { 5809679Slinton b = (Boolean) (not eofinput()); 5819679Slinton } 5829679Slinton return b; 5839679Slinton } 5849679Slinton 5859679Slinton /* 5869679Slinton * Return whether we are currently reading from standard input. 5879679Slinton */ 5889679Slinton 5899679Slinton public Boolean isstdin() 5909679Slinton { 5919679Slinton return (Boolean) (in == stdin); 5929679Slinton } 5939679Slinton 5949679Slinton /* 5959679Slinton * Send the current line to the shell. 5969679Slinton */ 5979679Slinton 5989679Slinton public shellline() 5999679Slinton { 6009679Slinton register char *p; 6019679Slinton 6029679Slinton p = curchar; 6039679Slinton while (*p != '\0' and (*p == '\n' or lexclass[*p] == WHITE)) { 6049679Slinton ++p; 6059679Slinton } 6069679Slinton shell(p); 6079679Slinton if (*p == '\0' and isterm(in)) { 6089679Slinton putchar('\n'); 6099679Slinton } 6109679Slinton erecover(); 6119679Slinton } 6129679Slinton 6139679Slinton /* 6149679Slinton * Read the rest of the current line in "shell mode". 6159679Slinton */ 6169679Slinton 6179679Slinton public beginshellmode() 6189679Slinton { 6199679Slinton shellmode = true; 6209679Slinton } 6219679Slinton 6229679Slinton /* 6239679Slinton * Print out a token for debugging. 6249679Slinton */ 6259679Slinton 6269679Slinton public print_token(f, t) 6279679Slinton File f; 6289679Slinton Token t; 6299679Slinton { 6309679Slinton if (t == '\n') { 6319679Slinton fprintf(f, "char '\\n'"); 6329679Slinton } else if (t == EOF) { 6339679Slinton fprintf(f, "EOF"); 6349679Slinton } else if (t < 256) { 6359679Slinton fprintf(f, "char '%c'", t); 6369679Slinton } else { 6379679Slinton fprintf(f, "\"%s\"", keywdstring(t)); 6389679Slinton } 6399679Slinton } 640