19679Slinton /* Copyright (c) 1982 Regents of the University of California */ 29679Slinton 3*11767Slinton static char sccsid[] = "@(#)scanner.c 1.5 03/30/83"; 49679Slinton 59679Slinton /* 69679Slinton * Debugger scanner. 79679Slinton */ 89679Slinton 99679Slinton #include "defs.h" 109679Slinton #include "scanner.h" 119679Slinton #include "main.h" 129679Slinton #include "keywords.h" 139679Slinton #include "tree.h" 149679Slinton #include "symbols.h" 159679Slinton #include "names.h" 169679Slinton #include "y.tab.h" 179679Slinton 189679Slinton #ifndef public 199679Slinton typedef int Token; 209679Slinton #endif 219679Slinton 229679Slinton public String initfile = ".dbxinit"; 239679Slinton 249679Slinton typedef enum { WHITE, ALPHA, NUM, OTHER } Charclass; 259679Slinton 269679Slinton private Charclass class[256 + 1]; 279679Slinton private Charclass *lexclass = class + 1; 289679Slinton 299679Slinton #define isdigit(c) (lexclass[c] == NUM) 309679Slinton #define isalnum(c) (lexclass[c] == ALPHA or lexclass[c] == NUM) 319679Slinton #define ishexdigit(c) ( \ 329679Slinton isdigit(c) or (c >= 'a' and c <= 'f') or (c >= 'A' and c <= 'F') \ 339679Slinton ) 349679Slinton 359679Slinton #define MAXLINESIZE 1024 369679Slinton 379679Slinton private File in; 389679Slinton private Char linebuf[MAXLINESIZE]; 399679Slinton private Char *curchar; 409679Slinton 419679Slinton #define MAXINCLDEPTH 10 429679Slinton 439679Slinton private struct { 449679Slinton File savefile; 459679Slinton Filename savefn; 469679Slinton int savelineno; 479679Slinton } inclinfo[MAXINCLDEPTH]; 489679Slinton 499679Slinton private unsigned int curinclindex; 509679Slinton 519679Slinton private Token getident(); 529679Slinton private Token getnum(); 539679Slinton private Token getstring(); 549679Slinton private Boolean eofinput(); 559679Slinton private Char charcon(); 569679Slinton private Char charlookup(); 579679Slinton 589679Slinton private enterlexclass(class, s) 599679Slinton Charclass class; 609679Slinton String s; 619679Slinton { 629679Slinton register char *p; 639679Slinton 649679Slinton for (p = s; *p != '\0'; p++) { 659679Slinton lexclass[*p] = class; 669679Slinton } 679679Slinton } 689679Slinton 699679Slinton public scanner_init() 709679Slinton { 719679Slinton register Integer i; 729679Slinton 739679Slinton for (i = 0; i < 257; i++) { 749679Slinton class[i] = OTHER; 759679Slinton } 769679Slinton enterlexclass(WHITE, " \t"); 779679Slinton enterlexclass(ALPHA, "abcdefghijklmnopqrstuvwxyz"); 789679Slinton enterlexclass(ALPHA, "ABCDEFGHIJKLMNOPQRSTUVWXYZ_$"); 799679Slinton enterlexclass(NUM, "0123456789"); 809679Slinton in = stdin; 819679Slinton errfilename = nil; 829679Slinton errlineno = 0; 839679Slinton curchar = linebuf; 849679Slinton linebuf[0] = '\0'; 859679Slinton } 869679Slinton 879679Slinton /* 889679Slinton * Read a single token. 899679Slinton * 909679Slinton * Input is line buffered. 919679Slinton * 929679Slinton * There are two "modes" of operation: one as in a compiler, 939679Slinton * and one for reading shell-like syntax. 949679Slinton */ 959679Slinton 969679Slinton private Boolean shellmode; 979679Slinton 989679Slinton public Token yylex() 999679Slinton { 1009679Slinton register int c; 1019679Slinton register char *p; 1029679Slinton register Token t; 1039679Slinton String line; 1049679Slinton 1059679Slinton p = curchar; 1069679Slinton if (*p == '\0') { 1079679Slinton do { 1089679Slinton if (isterm(in)) { 109*11767Slinton printf("> "); 110*11767Slinton fflush(stdout); 1119679Slinton } 1129679Slinton line = fgets(linebuf, MAXLINESIZE, in); 1139679Slinton } while (line == nil and not eofinput()); 1149679Slinton if (line == nil) { 1159679Slinton c = EOF; 1169679Slinton } else { 1179679Slinton p = linebuf; 1189679Slinton while (lexclass[*p] == WHITE) { 1199679Slinton p++; 1209679Slinton } 1219679Slinton shellmode = false; 1229679Slinton } 1239679Slinton } else { 1249679Slinton while (lexclass[*p] == WHITE) { 1259679Slinton p++; 1269679Slinton } 1279679Slinton } 1289679Slinton curchar = p; 1299679Slinton c = *p; 1309679Slinton if (lexclass[c] == ALPHA) { 1319679Slinton t = getident(); 1329679Slinton } else if (lexclass[c] == NUM) { 1339679Slinton t = getnum(); 1349679Slinton } else { 1359679Slinton ++curchar; 1369679Slinton switch (c) { 1379679Slinton case '\n': 1389679Slinton t = '\n'; 1399679Slinton if (errlineno != 0) { 1409679Slinton errlineno++; 1419679Slinton } 1429679Slinton break; 1439679Slinton 1449679Slinton case '"': 1459679Slinton case '\'': 1469679Slinton t = getstring(); 1479679Slinton break; 1489679Slinton 1499679Slinton case '.': 1509679Slinton if (shellmode) { 1519679Slinton --curchar; 1529679Slinton t = getident(); 1539679Slinton } else if (isdigit(*curchar)) { 1549679Slinton --curchar; 1559679Slinton t = getnum(); 1569679Slinton } else { 1579679Slinton t = '.'; 1589679Slinton } 1599679Slinton break; 1609679Slinton 1619679Slinton case '<': 1629679Slinton if (not shellmode and *curchar == '<') { 1639679Slinton ++curchar; 1649679Slinton t = LFORMER; 1659679Slinton } else { 1669679Slinton t = '<'; 1679679Slinton } 1689679Slinton break; 1699679Slinton 1709679Slinton case '>': 1719679Slinton if (not shellmode and *curchar == '>') { 1729679Slinton ++curchar; 1739679Slinton t = RFORMER; 1749679Slinton } else { 1759679Slinton t = '>'; 1769679Slinton } 1779679Slinton break; 1789679Slinton 1799679Slinton case '#': 1809679Slinton if (*curchar == '^') { 1819679Slinton ++curchar; 1829679Slinton t = ABSTRACTION; 1839679Slinton } else { 1849679Slinton t = '#'; 1859679Slinton } 1869679Slinton break; 1879679Slinton 1889679Slinton case '-': 1899679Slinton if (shellmode) { 1909679Slinton --curchar; 1919679Slinton t = getident(); 1929679Slinton } else if (*curchar == '>') { 1939679Slinton ++curchar; 1949679Slinton t = ARROW; 1959679Slinton } else { 1969679Slinton t = '-'; 1979679Slinton } 1989679Slinton break; 1999679Slinton 2009679Slinton case EOF: 2019679Slinton t = 0; 2029679Slinton break; 2039679Slinton 2049679Slinton default: 2059679Slinton if (shellmode and index("!&*()[]", c) == nil) { 2069679Slinton --curchar; 2079679Slinton t = getident(); 2089679Slinton } else { 2099679Slinton t = c; 2109679Slinton } 2119679Slinton break; 2129679Slinton } 2139679Slinton } 2149679Slinton # ifdef LEXDEBUG 2159679Slinton if (lexdebug) { 2169679Slinton fprintf(stderr, "yylex returns "); 2179679Slinton print_token(stderr, t); 2189679Slinton fprintf(stderr, "\n"); 2199679Slinton } 2209679Slinton # endif 2219679Slinton return t; 2229679Slinton } 2239679Slinton 2249679Slinton /* 2259679Slinton * Parser error handling. 2269679Slinton */ 2279679Slinton 2289679Slinton public yyerror(s) 2299679Slinton String s; 2309679Slinton { 2319679Slinton register Char *p, *tokenbegin, *tokenend; 2329679Slinton register Integer len; 2339679Slinton 2349679Slinton if (streq(s, "syntax error")) { 2359679Slinton beginerrmsg(); 2369679Slinton tokenend = curchar - 1; 2379679Slinton tokenbegin = tokenend; 2389679Slinton while (lexclass[*tokenbegin] != WHITE and tokenbegin > &linebuf[0]) { 2399679Slinton --tokenbegin; 2409679Slinton } 2419679Slinton len = tokenend - tokenbegin + 1; 2429679Slinton p = tokenbegin; 2439679Slinton if (p > &linebuf[0]) { 2449679Slinton while (lexclass[*p] == WHITE and p > &linebuf[0]) { 2459679Slinton --p; 2469679Slinton } 2479679Slinton } 2489679Slinton if (p == &linebuf[0]) { 2499679Slinton fprintf(stderr, "unrecognized command \"%.*s\"", len, tokenbegin); 2509679Slinton } else { 2519679Slinton fprintf(stderr, "syntax error"); 2529679Slinton if (len != 0) { 2539679Slinton fprintf(stderr, " on \"%.*s\"", len, tokenbegin); 2549679Slinton } 2559679Slinton } 2569679Slinton enderrmsg(); 2579679Slinton } else { 2589679Slinton error(s); 2599679Slinton } 2609679Slinton } 2619679Slinton 2629679Slinton /* 2639679Slinton * Eat the current line. 2649679Slinton */ 2659679Slinton 2669679Slinton public gobble() 2679679Slinton { 2689679Slinton curchar = linebuf; 2699679Slinton linebuf[0] = '\0'; 2709679Slinton } 2719679Slinton 2729679Slinton /* 2739679Slinton * Scan an identifier and check to see if it's a keyword. 2749679Slinton */ 2759679Slinton 2769679Slinton private Token getident() 2779679Slinton { 2789679Slinton char buf[256]; 2799679Slinton register Char *p, *q; 2809679Slinton register Token t; 2819679Slinton 2829679Slinton p = curchar; 2839679Slinton q = buf; 2849679Slinton if (shellmode) { 2859679Slinton do { 2869679Slinton *q++ = *p++; 2879679Slinton } while (index(" \t\n!&<>*[]()", *p) == nil); 2889679Slinton } else { 2899679Slinton do { 2909679Slinton *q++ = *p++; 2919679Slinton } while (isalnum(*p)); 2929679Slinton } 2939679Slinton curchar = p; 2949679Slinton *q = '\0'; 2959679Slinton yylval.y_name = identname(buf, false); 2969679Slinton if (not shellmode) { 2979679Slinton t = findkeyword(yylval.y_name); 2989679Slinton if (t == nil) { 2999679Slinton t = NAME; 3009679Slinton } 3019679Slinton } else { 3029679Slinton t = NAME; 3039679Slinton } 3049679Slinton return t; 3059679Slinton } 3069679Slinton 3079679Slinton /* 3089679Slinton * Scan a number. 3099679Slinton */ 3109679Slinton 3119679Slinton private Token getnum() 3129679Slinton { 3139679Slinton char buf[256]; 3149679Slinton register Char *p, *q; 3159679Slinton register Token t; 3169679Slinton Integer base; 3179679Slinton 3189679Slinton p = curchar; 3199679Slinton q = buf; 3209679Slinton if (*p == '0') { 3219679Slinton if (*(p+1) == 'x') { 3229679Slinton p += 2; 3239679Slinton base = 16; 3249679Slinton } else { 3259679Slinton base = 8; 3269679Slinton } 3279679Slinton } else { 3289679Slinton base = 10; 3299679Slinton } 3309679Slinton if (base == 16) { 3319679Slinton do { 3329679Slinton *q++ = *p++; 3339679Slinton } while (ishexdigit(*p)); 3349679Slinton } else { 3359679Slinton do { 3369679Slinton *q++ = *p++; 3379679Slinton } while (isdigit(*p)); 3389679Slinton } 3399679Slinton if (*p == '.') { 3409679Slinton do { 3419679Slinton *q++ = *p++; 3429679Slinton } while (isdigit(*p)); 3439679Slinton if (*p == 'e' or *p == 'E') { 3449679Slinton p++; 3459679Slinton if (*p == '+' or *p == '-' or isdigit(*p)) { 3469679Slinton *q++ = 'e'; 3479679Slinton do { 3489679Slinton *q++ = *p++; 3499679Slinton } while (isdigit(*p)); 3509679Slinton } 3519679Slinton } 3529679Slinton *q = '\0'; 3539679Slinton yylval.y_real = atof(buf); 3549679Slinton t = REAL; 3559679Slinton } else { 3569679Slinton *q = '\0'; 3579679Slinton switch (base) { 3589679Slinton case 10: 3599679Slinton yylval.y_int = atol(buf); 3609679Slinton break; 3619679Slinton 3629679Slinton case 8: 3639679Slinton yylval.y_int = octal(buf); 3649679Slinton break; 3659679Slinton 3669679Slinton case 16: 3679679Slinton yylval.y_int = hex(buf); 3689679Slinton break; 3699679Slinton 3709679Slinton default: 3719679Slinton badcaseval(base); 3729679Slinton } 3739679Slinton t = INT; 3749679Slinton } 3759679Slinton curchar = p; 3769679Slinton return t; 3779679Slinton } 3789679Slinton 3799679Slinton /* 3809679Slinton * Convert a string of octal digits to an integer. 3819679Slinton */ 3829679Slinton 3839679Slinton private int octal(s) 3849679Slinton String s; 3859679Slinton { 3869679Slinton register Char *p; 3879679Slinton register Integer n; 3889679Slinton 3899679Slinton n = 0; 3909679Slinton for (p = s; *p != '\0'; p++) { 3919679Slinton n = 8*n + (*p - '0'); 3929679Slinton } 3939679Slinton return n; 3949679Slinton } 3959679Slinton 3969679Slinton /* 3979679Slinton * Convert a string of hexadecimal digits to an integer. 3989679Slinton */ 3999679Slinton 4009679Slinton private int hex(s) 4019679Slinton String s; 4029679Slinton { 4039679Slinton register Char *p; 4049679Slinton register Integer n; 4059679Slinton 4069679Slinton n = 0; 4079679Slinton for (p = s; *p != '\0'; p++) { 4089679Slinton n *= 16; 4099679Slinton if (*p >= 'a' and *p <= 'f') { 4109679Slinton n += (*p - 'a' + 10); 4119679Slinton } else if (*p >= 'A' and *p <= 'F') { 4129679Slinton n += (*p - 'A' + 10); 4139679Slinton } else { 4149679Slinton n += (*p - '0'); 4159679Slinton } 4169679Slinton } 4179679Slinton return n; 4189679Slinton } 4199679Slinton 4209679Slinton /* 4219679Slinton * Scan a string. 4229679Slinton */ 4239679Slinton 4249679Slinton private Token getstring() 4259679Slinton { 4269679Slinton char buf[256]; 4279679Slinton register Char *p, *q; 4289679Slinton Boolean endofstring; 4299679Slinton 4309679Slinton p = curchar; 4319679Slinton q = buf; 4329679Slinton endofstring = false; 4339679Slinton while (not endofstring) { 4349679Slinton if (*p == '\n' or *p == '\0') { 4359679Slinton error("non-terminated string"); 4369679Slinton endofstring = true; 43711122Slinton } else if (*p == '"' or *p == '\'') { 43811122Slinton if (*(p+1) != *p) { 4399679Slinton endofstring = true; 4409679Slinton } else { 4419679Slinton *q++ = *p; 4429679Slinton } 4439679Slinton } else { 44411559Slinton *q++ = charcon(p); 44511559Slinton p = curchar; 4469679Slinton } 4479679Slinton p++; 4489679Slinton } 4499679Slinton curchar = p; 4509679Slinton *q = '\0'; 4519679Slinton yylval.y_string = strdup(buf); 4529679Slinton return STRING; 4539679Slinton } 4549679Slinton 4559679Slinton /* 4569679Slinton * Process a character constant. 4579679Slinton * Watch out for backslashes. 4589679Slinton */ 4599679Slinton 46011559Slinton private Char charcon(p) 46111559Slinton char *p; 4629679Slinton { 46311559Slinton char c, buf[10], *q; 4649679Slinton 46511559Slinton if (*p == '\\') { 46611559Slinton ++p; 4679679Slinton if (*p != '\\') { 4689679Slinton q = buf; 4699679Slinton do { 4709679Slinton *q++ = *p++; 47111559Slinton } while (*p != '\\' and *p != '\'' and *p != '\n' and *p != '\0'); 4729679Slinton *q = '\0'; 4739679Slinton if (isdigit(buf[0])) { 4749679Slinton c = (Char) octal(buf); 4759679Slinton } else { 4769679Slinton c = charlookup(buf); 4779679Slinton } 47811559Slinton curchar = p - 1; 4799679Slinton } else { 4809679Slinton c = '\\'; 4819679Slinton } 4829679Slinton } else { 48311559Slinton c = *p; 4849679Slinton } 4859679Slinton return c; 4869679Slinton } 4879679Slinton 4889679Slinton /* 4899679Slinton * Do a lookup for a ASCII character name. 4909679Slinton */ 4919679Slinton 4929679Slinton private String ascii[] = { 4939679Slinton "NUL", "SOH", "STX", "ETX", "EOT", "ENQ", "ACK", "BEL", 4949679Slinton "BS", "HT", "NL", "VT", "NP", "CR", "SO", "SI", 4959679Slinton "DLE", "DC1", "DC2", "DC3", "DC4", "NAK", "SYN", "ETB", 4969679Slinton "CAN", "EM", "SUB", "ESC", "FS", "GS", "RS", "US", 4979679Slinton "SP", nil 4989679Slinton }; 4999679Slinton 5009679Slinton private char charlookup(s) 5019679Slinton String s; 5029679Slinton { 5039679Slinton register int i; 5049679Slinton 5059679Slinton for (i = 0; ascii[i] != NULL; i++) { 5069679Slinton if (streq(s, ascii[i])) { 5079679Slinton return i; 5089679Slinton } 5099679Slinton } 5109679Slinton if (streq(s, "DEL")) { 5119679Slinton return 0177; 5129679Slinton } 5139679Slinton error("unknown ascii name \"%s\"", s); 5149679Slinton return '?'; 5159679Slinton } 5169679Slinton 5179679Slinton /* 5189679Slinton * Input file management routines. 5199679Slinton */ 5209679Slinton 5219679Slinton public setinput(filename) 5229679Slinton Filename filename; 5239679Slinton { 5249679Slinton File f; 5259679Slinton 5269679Slinton f = fopen(filename, "r"); 5279679Slinton if (f == nil) { 5289679Slinton error("can't open %s", filename); 5299679Slinton } else { 5309679Slinton if (curinclindex >= MAXINCLDEPTH) { 5319679Slinton error("unreasonable input nesting on \"%s\"", filename); 5329679Slinton } 5339679Slinton inclinfo[curinclindex].savefile = in; 5349679Slinton inclinfo[curinclindex].savefn = errfilename; 5359679Slinton inclinfo[curinclindex].savelineno = errlineno; 5369679Slinton curinclindex++; 5379679Slinton in = f; 5389679Slinton errfilename = filename; 5399679Slinton errlineno = 1; 5409679Slinton } 5419679Slinton } 5429679Slinton 5439679Slinton private Boolean eofinput() 5449679Slinton { 5459679Slinton register Boolean b; 5469679Slinton 5479679Slinton if (curinclindex == 0) { 5489679Slinton if (isterm(in)) { 5499679Slinton putchar('\n'); 5509679Slinton b = false; 5519679Slinton } else { 5529679Slinton b = true; 5539679Slinton } 5549679Slinton } else { 5559679Slinton fclose(in); 5569679Slinton --curinclindex; 5579679Slinton in = inclinfo[curinclindex].savefile; 5589679Slinton errfilename = inclinfo[curinclindex].savefn; 5599679Slinton errlineno = inclinfo[curinclindex].savelineno; 5609679Slinton b = false; 5619679Slinton } 5629679Slinton return b; 5639679Slinton } 5649679Slinton 5659679Slinton /* 5669679Slinton * Pop the current input. Return whether successful. 5679679Slinton */ 5689679Slinton 5699679Slinton public Boolean popinput() 5709679Slinton { 5719679Slinton Boolean b; 5729679Slinton 5739679Slinton if (curinclindex == 0) { 5749679Slinton b = false; 5759679Slinton } else { 5769679Slinton b = (Boolean) (not eofinput()); 5779679Slinton } 5789679Slinton return b; 5799679Slinton } 5809679Slinton 5819679Slinton /* 5829679Slinton * Return whether we are currently reading from standard input. 5839679Slinton */ 5849679Slinton 5859679Slinton public Boolean isstdin() 5869679Slinton { 5879679Slinton return (Boolean) (in == stdin); 5889679Slinton } 5899679Slinton 5909679Slinton /* 5919679Slinton * Send the current line to the shell. 5929679Slinton */ 5939679Slinton 5949679Slinton public shellline() 5959679Slinton { 5969679Slinton register char *p; 5979679Slinton 5989679Slinton p = curchar; 5999679Slinton while (*p != '\0' and (*p == '\n' or lexclass[*p] == WHITE)) { 6009679Slinton ++p; 6019679Slinton } 6029679Slinton shell(p); 6039679Slinton if (*p == '\0' and isterm(in)) { 6049679Slinton putchar('\n'); 6059679Slinton } 6069679Slinton erecover(); 6079679Slinton } 6089679Slinton 6099679Slinton /* 6109679Slinton * Read the rest of the current line in "shell mode". 6119679Slinton */ 6129679Slinton 6139679Slinton public beginshellmode() 6149679Slinton { 6159679Slinton shellmode = true; 6169679Slinton } 6179679Slinton 6189679Slinton /* 6199679Slinton * Print out a token for debugging. 6209679Slinton */ 6219679Slinton 6229679Slinton public print_token(f, t) 6239679Slinton File f; 6249679Slinton Token t; 6259679Slinton { 6269679Slinton if (t == '\n') { 6279679Slinton fprintf(f, "char '\\n'"); 6289679Slinton } else if (t == EOF) { 6299679Slinton fprintf(f, "EOF"); 6309679Slinton } else if (t < 256) { 6319679Slinton fprintf(f, "char '%c'", t); 6329679Slinton } else { 6339679Slinton fprintf(f, "\"%s\"", keywdstring(t)); 6349679Slinton } 6359679Slinton } 636