1 /* Copyright (c) 1982 Regents of the University of California */ 2 3 static char sccsid[] = "@(#)scanner.c 1.5 03/30/83"; 4 5 /* 6 * Debugger scanner. 7 */ 8 9 #include "defs.h" 10 #include "scanner.h" 11 #include "main.h" 12 #include "keywords.h" 13 #include "tree.h" 14 #include "symbols.h" 15 #include "names.h" 16 #include "y.tab.h" 17 18 #ifndef public 19 typedef int Token; 20 #endif 21 22 public String initfile = ".dbxinit"; 23 24 typedef enum { WHITE, ALPHA, NUM, OTHER } Charclass; 25 26 private Charclass class[256 + 1]; 27 private Charclass *lexclass = class + 1; 28 29 #define isdigit(c) (lexclass[c] == NUM) 30 #define isalnum(c) (lexclass[c] == ALPHA or lexclass[c] == NUM) 31 #define ishexdigit(c) ( \ 32 isdigit(c) or (c >= 'a' and c <= 'f') or (c >= 'A' and c <= 'F') \ 33 ) 34 35 #define MAXLINESIZE 1024 36 37 private File in; 38 private Char linebuf[MAXLINESIZE]; 39 private Char *curchar; 40 41 #define MAXINCLDEPTH 10 42 43 private struct { 44 File savefile; 45 Filename savefn; 46 int savelineno; 47 } inclinfo[MAXINCLDEPTH]; 48 49 private unsigned int curinclindex; 50 51 private Token getident(); 52 private Token getnum(); 53 private Token getstring(); 54 private Boolean eofinput(); 55 private Char charcon(); 56 private Char charlookup(); 57 58 private enterlexclass(class, s) 59 Charclass class; 60 String s; 61 { 62 register char *p; 63 64 for (p = s; *p != '\0'; p++) { 65 lexclass[*p] = class; 66 } 67 } 68 69 public scanner_init() 70 { 71 register Integer i; 72 73 for (i = 0; i < 257; i++) { 74 class[i] = OTHER; 75 } 76 enterlexclass(WHITE, " \t"); 77 enterlexclass(ALPHA, "abcdefghijklmnopqrstuvwxyz"); 78 enterlexclass(ALPHA, "ABCDEFGHIJKLMNOPQRSTUVWXYZ_$"); 79 enterlexclass(NUM, "0123456789"); 80 in = stdin; 81 errfilename = nil; 82 errlineno = 0; 83 curchar = linebuf; 84 linebuf[0] = '\0'; 85 } 86 87 /* 88 * Read a single token. 89 * 90 * Input is line buffered. 91 * 92 * There are two "modes" of operation: one as in a compiler, 93 * and one for reading shell-like syntax. 94 */ 95 96 private Boolean shellmode; 97 98 public Token yylex() 99 { 100 register int c; 101 register char *p; 102 register Token t; 103 String line; 104 105 p = curchar; 106 if (*p == '\0') { 107 do { 108 if (isterm(in)) { 109 printf("> "); 110 fflush(stdout); 111 } 112 line = fgets(linebuf, MAXLINESIZE, in); 113 } while (line == nil and not eofinput()); 114 if (line == nil) { 115 c = EOF; 116 } else { 117 p = linebuf; 118 while (lexclass[*p] == WHITE) { 119 p++; 120 } 121 shellmode = false; 122 } 123 } else { 124 while (lexclass[*p] == WHITE) { 125 p++; 126 } 127 } 128 curchar = p; 129 c = *p; 130 if (lexclass[c] == ALPHA) { 131 t = getident(); 132 } else if (lexclass[c] == NUM) { 133 t = getnum(); 134 } else { 135 ++curchar; 136 switch (c) { 137 case '\n': 138 t = '\n'; 139 if (errlineno != 0) { 140 errlineno++; 141 } 142 break; 143 144 case '"': 145 case '\'': 146 t = getstring(); 147 break; 148 149 case '.': 150 if (shellmode) { 151 --curchar; 152 t = getident(); 153 } else if (isdigit(*curchar)) { 154 --curchar; 155 t = getnum(); 156 } else { 157 t = '.'; 158 } 159 break; 160 161 case '<': 162 if (not shellmode and *curchar == '<') { 163 ++curchar; 164 t = LFORMER; 165 } else { 166 t = '<'; 167 } 168 break; 169 170 case '>': 171 if (not shellmode and *curchar == '>') { 172 ++curchar; 173 t = RFORMER; 174 } else { 175 t = '>'; 176 } 177 break; 178 179 case '#': 180 if (*curchar == '^') { 181 ++curchar; 182 t = ABSTRACTION; 183 } else { 184 t = '#'; 185 } 186 break; 187 188 case '-': 189 if (shellmode) { 190 --curchar; 191 t = getident(); 192 } else if (*curchar == '>') { 193 ++curchar; 194 t = ARROW; 195 } else { 196 t = '-'; 197 } 198 break; 199 200 case EOF: 201 t = 0; 202 break; 203 204 default: 205 if (shellmode and index("!&*()[]", c) == nil) { 206 --curchar; 207 t = getident(); 208 } else { 209 t = c; 210 } 211 break; 212 } 213 } 214 # ifdef LEXDEBUG 215 if (lexdebug) { 216 fprintf(stderr, "yylex returns "); 217 print_token(stderr, t); 218 fprintf(stderr, "\n"); 219 } 220 # endif 221 return t; 222 } 223 224 /* 225 * Parser error handling. 226 */ 227 228 public yyerror(s) 229 String s; 230 { 231 register Char *p, *tokenbegin, *tokenend; 232 register Integer len; 233 234 if (streq(s, "syntax error")) { 235 beginerrmsg(); 236 tokenend = curchar - 1; 237 tokenbegin = tokenend; 238 while (lexclass[*tokenbegin] != WHITE and tokenbegin > &linebuf[0]) { 239 --tokenbegin; 240 } 241 len = tokenend - tokenbegin + 1; 242 p = tokenbegin; 243 if (p > &linebuf[0]) { 244 while (lexclass[*p] == WHITE and p > &linebuf[0]) { 245 --p; 246 } 247 } 248 if (p == &linebuf[0]) { 249 fprintf(stderr, "unrecognized command \"%.*s\"", len, tokenbegin); 250 } else { 251 fprintf(stderr, "syntax error"); 252 if (len != 0) { 253 fprintf(stderr, " on \"%.*s\"", len, tokenbegin); 254 } 255 } 256 enderrmsg(); 257 } else { 258 error(s); 259 } 260 } 261 262 /* 263 * Eat the current line. 264 */ 265 266 public gobble() 267 { 268 curchar = linebuf; 269 linebuf[0] = '\0'; 270 } 271 272 /* 273 * Scan an identifier and check to see if it's a keyword. 274 */ 275 276 private Token getident() 277 { 278 char buf[256]; 279 register Char *p, *q; 280 register Token t; 281 282 p = curchar; 283 q = buf; 284 if (shellmode) { 285 do { 286 *q++ = *p++; 287 } while (index(" \t\n!&<>*[]()", *p) == nil); 288 } else { 289 do { 290 *q++ = *p++; 291 } while (isalnum(*p)); 292 } 293 curchar = p; 294 *q = '\0'; 295 yylval.y_name = identname(buf, false); 296 if (not shellmode) { 297 t = findkeyword(yylval.y_name); 298 if (t == nil) { 299 t = NAME; 300 } 301 } else { 302 t = NAME; 303 } 304 return t; 305 } 306 307 /* 308 * Scan a number. 309 */ 310 311 private Token getnum() 312 { 313 char buf[256]; 314 register Char *p, *q; 315 register Token t; 316 Integer base; 317 318 p = curchar; 319 q = buf; 320 if (*p == '0') { 321 if (*(p+1) == 'x') { 322 p += 2; 323 base = 16; 324 } else { 325 base = 8; 326 } 327 } else { 328 base = 10; 329 } 330 if (base == 16) { 331 do { 332 *q++ = *p++; 333 } while (ishexdigit(*p)); 334 } else { 335 do { 336 *q++ = *p++; 337 } while (isdigit(*p)); 338 } 339 if (*p == '.') { 340 do { 341 *q++ = *p++; 342 } while (isdigit(*p)); 343 if (*p == 'e' or *p == 'E') { 344 p++; 345 if (*p == '+' or *p == '-' or isdigit(*p)) { 346 *q++ = 'e'; 347 do { 348 *q++ = *p++; 349 } while (isdigit(*p)); 350 } 351 } 352 *q = '\0'; 353 yylval.y_real = atof(buf); 354 t = REAL; 355 } else { 356 *q = '\0'; 357 switch (base) { 358 case 10: 359 yylval.y_int = atol(buf); 360 break; 361 362 case 8: 363 yylval.y_int = octal(buf); 364 break; 365 366 case 16: 367 yylval.y_int = hex(buf); 368 break; 369 370 default: 371 badcaseval(base); 372 } 373 t = INT; 374 } 375 curchar = p; 376 return t; 377 } 378 379 /* 380 * Convert a string of octal digits to an integer. 381 */ 382 383 private int octal(s) 384 String s; 385 { 386 register Char *p; 387 register Integer n; 388 389 n = 0; 390 for (p = s; *p != '\0'; p++) { 391 n = 8*n + (*p - '0'); 392 } 393 return n; 394 } 395 396 /* 397 * Convert a string of hexadecimal digits to an integer. 398 */ 399 400 private int hex(s) 401 String s; 402 { 403 register Char *p; 404 register Integer n; 405 406 n = 0; 407 for (p = s; *p != '\0'; p++) { 408 n *= 16; 409 if (*p >= 'a' and *p <= 'f') { 410 n += (*p - 'a' + 10); 411 } else if (*p >= 'A' and *p <= 'F') { 412 n += (*p - 'A' + 10); 413 } else { 414 n += (*p - '0'); 415 } 416 } 417 return n; 418 } 419 420 /* 421 * Scan a string. 422 */ 423 424 private Token getstring() 425 { 426 char buf[256]; 427 register Char *p, *q; 428 Boolean endofstring; 429 430 p = curchar; 431 q = buf; 432 endofstring = false; 433 while (not endofstring) { 434 if (*p == '\n' or *p == '\0') { 435 error("non-terminated string"); 436 endofstring = true; 437 } else if (*p == '"' or *p == '\'') { 438 if (*(p+1) != *p) { 439 endofstring = true; 440 } else { 441 *q++ = *p; 442 } 443 } else { 444 *q++ = charcon(p); 445 p = curchar; 446 } 447 p++; 448 } 449 curchar = p; 450 *q = '\0'; 451 yylval.y_string = strdup(buf); 452 return STRING; 453 } 454 455 /* 456 * Process a character constant. 457 * Watch out for backslashes. 458 */ 459 460 private Char charcon(p) 461 char *p; 462 { 463 char c, buf[10], *q; 464 465 if (*p == '\\') { 466 ++p; 467 if (*p != '\\') { 468 q = buf; 469 do { 470 *q++ = *p++; 471 } while (*p != '\\' and *p != '\'' and *p != '\n' and *p != '\0'); 472 *q = '\0'; 473 if (isdigit(buf[0])) { 474 c = (Char) octal(buf); 475 } else { 476 c = charlookup(buf); 477 } 478 curchar = p - 1; 479 } else { 480 c = '\\'; 481 } 482 } else { 483 c = *p; 484 } 485 return c; 486 } 487 488 /* 489 * Do a lookup for a ASCII character name. 490 */ 491 492 private String ascii[] = { 493 "NUL", "SOH", "STX", "ETX", "EOT", "ENQ", "ACK", "BEL", 494 "BS", "HT", "NL", "VT", "NP", "CR", "SO", "SI", 495 "DLE", "DC1", "DC2", "DC3", "DC4", "NAK", "SYN", "ETB", 496 "CAN", "EM", "SUB", "ESC", "FS", "GS", "RS", "US", 497 "SP", nil 498 }; 499 500 private char charlookup(s) 501 String s; 502 { 503 register int i; 504 505 for (i = 0; ascii[i] != NULL; i++) { 506 if (streq(s, ascii[i])) { 507 return i; 508 } 509 } 510 if (streq(s, "DEL")) { 511 return 0177; 512 } 513 error("unknown ascii name \"%s\"", s); 514 return '?'; 515 } 516 517 /* 518 * Input file management routines. 519 */ 520 521 public setinput(filename) 522 Filename filename; 523 { 524 File f; 525 526 f = fopen(filename, "r"); 527 if (f == nil) { 528 error("can't open %s", filename); 529 } else { 530 if (curinclindex >= MAXINCLDEPTH) { 531 error("unreasonable input nesting on \"%s\"", filename); 532 } 533 inclinfo[curinclindex].savefile = in; 534 inclinfo[curinclindex].savefn = errfilename; 535 inclinfo[curinclindex].savelineno = errlineno; 536 curinclindex++; 537 in = f; 538 errfilename = filename; 539 errlineno = 1; 540 } 541 } 542 543 private Boolean eofinput() 544 { 545 register Boolean b; 546 547 if (curinclindex == 0) { 548 if (isterm(in)) { 549 putchar('\n'); 550 b = false; 551 } else { 552 b = true; 553 } 554 } else { 555 fclose(in); 556 --curinclindex; 557 in = inclinfo[curinclindex].savefile; 558 errfilename = inclinfo[curinclindex].savefn; 559 errlineno = inclinfo[curinclindex].savelineno; 560 b = false; 561 } 562 return b; 563 } 564 565 /* 566 * Pop the current input. Return whether successful. 567 */ 568 569 public Boolean popinput() 570 { 571 Boolean b; 572 573 if (curinclindex == 0) { 574 b = false; 575 } else { 576 b = (Boolean) (not eofinput()); 577 } 578 return b; 579 } 580 581 /* 582 * Return whether we are currently reading from standard input. 583 */ 584 585 public Boolean isstdin() 586 { 587 return (Boolean) (in == stdin); 588 } 589 590 /* 591 * Send the current line to the shell. 592 */ 593 594 public shellline() 595 { 596 register char *p; 597 598 p = curchar; 599 while (*p != '\0' and (*p == '\n' or lexclass[*p] == WHITE)) { 600 ++p; 601 } 602 shell(p); 603 if (*p == '\0' and isterm(in)) { 604 putchar('\n'); 605 } 606 erecover(); 607 } 608 609 /* 610 * Read the rest of the current line in "shell mode". 611 */ 612 613 public beginshellmode() 614 { 615 shellmode = true; 616 } 617 618 /* 619 * Print out a token for debugging. 620 */ 621 622 public print_token(f, t) 623 File f; 624 Token t; 625 { 626 if (t == '\n') { 627 fprintf(f, "char '\\n'"); 628 } else if (t == EOF) { 629 fprintf(f, "EOF"); 630 } else if (t < 256) { 631 fprintf(f, "char '%c'", t); 632 } else { 633 fprintf(f, "\"%s\"", keywdstring(t)); 634 } 635 } 636