1 /* Copyright (c) 1982 Regents of the University of California */ 2 3 static char sccsid[] = "@(#)scanner.c 1.4 03/13/83"; 4 5 /* 6 * Debugger scanner. 7 */ 8 9 #include "defs.h" 10 #include "scanner.h" 11 #include "main.h" 12 #include "keywords.h" 13 #include "tree.h" 14 #include "symbols.h" 15 #include "names.h" 16 #include "y.tab.h" 17 18 #ifndef public 19 typedef int Token; 20 #endif 21 22 public String initfile = ".dbxinit"; 23 24 typedef enum { WHITE, ALPHA, NUM, OTHER } Charclass; 25 26 private Charclass class[256 + 1]; 27 private Charclass *lexclass = class + 1; 28 29 #define isdigit(c) (lexclass[c] == NUM) 30 #define isalnum(c) (lexclass[c] == ALPHA or lexclass[c] == NUM) 31 #define ishexdigit(c) ( \ 32 isdigit(c) or (c >= 'a' and c <= 'f') or (c >= 'A' and c <= 'F') \ 33 ) 34 35 #define MAXLINESIZE 1024 36 37 private File in; 38 private Char linebuf[MAXLINESIZE]; 39 private Char *curchar; 40 41 #define MAXINCLDEPTH 10 42 43 private struct { 44 File savefile; 45 Filename savefn; 46 int savelineno; 47 } inclinfo[MAXINCLDEPTH]; 48 49 private unsigned int curinclindex; 50 51 private Boolean firsttoken = true; 52 private Boolean firstinit = true; 53 54 private Token getident(); 55 private Token getnum(); 56 private Token getstring(); 57 private Boolean eofinput(); 58 private Char charcon(); 59 private Char charlookup(); 60 61 private enterlexclass(class, s) 62 Charclass class; 63 String s; 64 { 65 register char *p; 66 67 for (p = s; *p != '\0'; p++) { 68 lexclass[*p] = class; 69 } 70 } 71 72 public scanner_init() 73 { 74 register Integer i; 75 76 for (i = 0; i < 257; i++) { 77 class[i] = OTHER; 78 } 79 enterlexclass(WHITE, " \t"); 80 enterlexclass(ALPHA, "abcdefghijklmnopqrstuvwxyz"); 81 enterlexclass(ALPHA, "ABCDEFGHIJKLMNOPQRSTUVWXYZ_$"); 82 enterlexclass(NUM, "0123456789"); 83 in = stdin; 84 errfilename = nil; 85 errlineno = 0; 86 curchar = linebuf; 87 linebuf[0] = '\0'; 88 if (runfirst) { 89 firstinit = false; 90 firsttoken = false; 91 } else if (firstinit and isterm(in)) { 92 firstinit = false; 93 printf("> "); 94 fflush(stdout); 95 } 96 } 97 98 /* 99 * Read a single token. 100 * 101 * Input is line buffered. 102 * 103 * There are two "modes" of operation: one as in a compiler, 104 * and one for reading shell-like syntax. 105 */ 106 107 private Boolean shellmode; 108 109 public Token yylex() 110 { 111 register int c; 112 register char *p; 113 register Token t; 114 String line; 115 116 p = curchar; 117 if (*p == '\0') { 118 do { 119 if (isterm(in)) { 120 if (firsttoken) { 121 firsttoken = false; 122 } else { 123 printf("> "); 124 fflush(stdout); 125 } 126 } 127 line = fgets(linebuf, MAXLINESIZE, in); 128 } while (line == nil and not eofinput()); 129 if (line == nil) { 130 c = EOF; 131 } else { 132 p = linebuf; 133 while (lexclass[*p] == WHITE) { 134 p++; 135 } 136 shellmode = false; 137 } 138 } else { 139 while (lexclass[*p] == WHITE) { 140 p++; 141 } 142 } 143 curchar = p; 144 c = *p; 145 if (lexclass[c] == ALPHA) { 146 t = getident(); 147 } else if (lexclass[c] == NUM) { 148 t = getnum(); 149 } else { 150 ++curchar; 151 switch (c) { 152 case '\n': 153 t = '\n'; 154 if (errlineno != 0) { 155 errlineno++; 156 } 157 break; 158 159 case '"': 160 case '\'': 161 t = getstring(); 162 break; 163 164 case '.': 165 if (shellmode) { 166 --curchar; 167 t = getident(); 168 } else if (isdigit(*curchar)) { 169 --curchar; 170 t = getnum(); 171 } else { 172 t = '.'; 173 } 174 break; 175 176 case '<': 177 if (not shellmode and *curchar == '<') { 178 ++curchar; 179 t = LFORMER; 180 } else { 181 t = '<'; 182 } 183 break; 184 185 case '>': 186 if (not shellmode and *curchar == '>') { 187 ++curchar; 188 t = RFORMER; 189 } else { 190 t = '>'; 191 } 192 break; 193 194 case '#': 195 if (*curchar == '^') { 196 ++curchar; 197 t = ABSTRACTION; 198 } else { 199 t = '#'; 200 } 201 break; 202 203 case '-': 204 if (shellmode) { 205 --curchar; 206 t = getident(); 207 } else if (*curchar == '>') { 208 ++curchar; 209 t = ARROW; 210 } else { 211 t = '-'; 212 } 213 break; 214 215 case EOF: 216 t = 0; 217 break; 218 219 default: 220 if (shellmode and index("!&*()[]", c) == nil) { 221 --curchar; 222 t = getident(); 223 } else { 224 t = c; 225 } 226 break; 227 } 228 } 229 # ifdef LEXDEBUG 230 if (lexdebug) { 231 fprintf(stderr, "yylex returns "); 232 print_token(stderr, t); 233 fprintf(stderr, "\n"); 234 } 235 # endif 236 return t; 237 } 238 239 /* 240 * Parser error handling. 241 */ 242 243 public yyerror(s) 244 String s; 245 { 246 register Char *p, *tokenbegin, *tokenend; 247 register Integer len; 248 249 if (streq(s, "syntax error")) { 250 beginerrmsg(); 251 tokenend = curchar - 1; 252 tokenbegin = tokenend; 253 while (lexclass[*tokenbegin] != WHITE and tokenbegin > &linebuf[0]) { 254 --tokenbegin; 255 } 256 len = tokenend - tokenbegin + 1; 257 p = tokenbegin; 258 if (p > &linebuf[0]) { 259 while (lexclass[*p] == WHITE and p > &linebuf[0]) { 260 --p; 261 } 262 } 263 if (p == &linebuf[0]) { 264 fprintf(stderr, "unrecognized command \"%.*s\"", len, tokenbegin); 265 } else { 266 fprintf(stderr, "syntax error"); 267 if (len != 0) { 268 fprintf(stderr, " on \"%.*s\"", len, tokenbegin); 269 } 270 } 271 enderrmsg(); 272 } else { 273 error(s); 274 } 275 } 276 277 /* 278 * Eat the current line. 279 */ 280 281 public gobble() 282 { 283 curchar = linebuf; 284 linebuf[0] = '\0'; 285 } 286 287 /* 288 * Scan an identifier and check to see if it's a keyword. 289 */ 290 291 private Token getident() 292 { 293 char buf[256]; 294 register Char *p, *q; 295 register Token t; 296 297 p = curchar; 298 q = buf; 299 if (shellmode) { 300 do { 301 *q++ = *p++; 302 } while (index(" \t\n!&<>*[]()", *p) == nil); 303 } else { 304 do { 305 *q++ = *p++; 306 } while (isalnum(*p)); 307 } 308 curchar = p; 309 *q = '\0'; 310 yylval.y_name = identname(buf, false); 311 if (not shellmode) { 312 t = findkeyword(yylval.y_name); 313 if (t == nil) { 314 t = NAME; 315 } 316 } else { 317 t = NAME; 318 } 319 return t; 320 } 321 322 /* 323 * Scan a number. 324 */ 325 326 private Token getnum() 327 { 328 char buf[256]; 329 register Char *p, *q; 330 register Token t; 331 Integer base; 332 333 p = curchar; 334 q = buf; 335 if (*p == '0') { 336 if (*(p+1) == 'x') { 337 p += 2; 338 base = 16; 339 } else { 340 base = 8; 341 } 342 } else { 343 base = 10; 344 } 345 if (base == 16) { 346 do { 347 *q++ = *p++; 348 } while (ishexdigit(*p)); 349 } else { 350 do { 351 *q++ = *p++; 352 } while (isdigit(*p)); 353 } 354 if (*p == '.') { 355 do { 356 *q++ = *p++; 357 } while (isdigit(*p)); 358 if (*p == 'e' or *p == 'E') { 359 p++; 360 if (*p == '+' or *p == '-' or isdigit(*p)) { 361 *q++ = 'e'; 362 do { 363 *q++ = *p++; 364 } while (isdigit(*p)); 365 } 366 } 367 *q = '\0'; 368 yylval.y_real = atof(buf); 369 t = REAL; 370 } else { 371 *q = '\0'; 372 switch (base) { 373 case 10: 374 yylval.y_int = atol(buf); 375 break; 376 377 case 8: 378 yylval.y_int = octal(buf); 379 break; 380 381 case 16: 382 yylval.y_int = hex(buf); 383 break; 384 385 default: 386 badcaseval(base); 387 } 388 t = INT; 389 } 390 curchar = p; 391 return t; 392 } 393 394 /* 395 * Convert a string of octal digits to an integer. 396 */ 397 398 private int octal(s) 399 String s; 400 { 401 register Char *p; 402 register Integer n; 403 404 n = 0; 405 for (p = s; *p != '\0'; p++) { 406 n = 8*n + (*p - '0'); 407 } 408 return n; 409 } 410 411 /* 412 * Convert a string of hexadecimal digits to an integer. 413 */ 414 415 private int hex(s) 416 String s; 417 { 418 register Char *p; 419 register Integer n; 420 421 n = 0; 422 for (p = s; *p != '\0'; p++) { 423 n *= 16; 424 if (*p >= 'a' and *p <= 'f') { 425 n += (*p - 'a' + 10); 426 } else if (*p >= 'A' and *p <= 'F') { 427 n += (*p - 'A' + 10); 428 } else { 429 n += (*p - '0'); 430 } 431 } 432 return n; 433 } 434 435 /* 436 * Scan a string. 437 */ 438 439 private Token getstring() 440 { 441 char buf[256]; 442 register Char *p, *q; 443 Boolean endofstring; 444 445 p = curchar; 446 q = buf; 447 endofstring = false; 448 while (not endofstring) { 449 if (*p == '\n' or *p == '\0') { 450 error("non-terminated string"); 451 endofstring = true; 452 } else if (*p == '"' or *p == '\'') { 453 if (*(p+1) != *p) { 454 endofstring = true; 455 } else { 456 *q++ = *p; 457 } 458 } else { 459 *q++ = charcon(p); 460 p = curchar; 461 } 462 p++; 463 } 464 curchar = p; 465 *q = '\0'; 466 yylval.y_string = strdup(buf); 467 return STRING; 468 } 469 470 /* 471 * Process a character constant. 472 * Watch out for backslashes. 473 */ 474 475 private Char charcon(p) 476 char *p; 477 { 478 char c, buf[10], *q; 479 480 if (*p == '\\') { 481 ++p; 482 if (*p != '\\') { 483 q = buf; 484 do { 485 *q++ = *p++; 486 } while (*p != '\\' and *p != '\'' and *p != '\n' and *p != '\0'); 487 *q = '\0'; 488 if (isdigit(buf[0])) { 489 c = (Char) octal(buf); 490 } else { 491 c = charlookup(buf); 492 } 493 curchar = p - 1; 494 } else { 495 c = '\\'; 496 } 497 } else { 498 c = *p; 499 } 500 return c; 501 } 502 503 /* 504 * Do a lookup for a ASCII character name. 505 */ 506 507 private String ascii[] = { 508 "NUL", "SOH", "STX", "ETX", "EOT", "ENQ", "ACK", "BEL", 509 "BS", "HT", "NL", "VT", "NP", "CR", "SO", "SI", 510 "DLE", "DC1", "DC2", "DC3", "DC4", "NAK", "SYN", "ETB", 511 "CAN", "EM", "SUB", "ESC", "FS", "GS", "RS", "US", 512 "SP", nil 513 }; 514 515 private char charlookup(s) 516 String s; 517 { 518 register int i; 519 520 for (i = 0; ascii[i] != NULL; i++) { 521 if (streq(s, ascii[i])) { 522 return i; 523 } 524 } 525 if (streq(s, "DEL")) { 526 return 0177; 527 } 528 error("unknown ascii name \"%s\"", s); 529 return '?'; 530 } 531 532 /* 533 * Input file management routines. 534 */ 535 536 public setinput(filename) 537 Filename filename; 538 { 539 File f; 540 541 f = fopen(filename, "r"); 542 if (f == nil) { 543 error("can't open %s", filename); 544 } else { 545 if (curinclindex >= MAXINCLDEPTH) { 546 error("unreasonable input nesting on \"%s\"", filename); 547 } 548 inclinfo[curinclindex].savefile = in; 549 inclinfo[curinclindex].savefn = errfilename; 550 inclinfo[curinclindex].savelineno = errlineno; 551 curinclindex++; 552 in = f; 553 errfilename = filename; 554 errlineno = 1; 555 } 556 } 557 558 private Boolean eofinput() 559 { 560 register Boolean b; 561 562 if (curinclindex == 0) { 563 if (isterm(in)) { 564 putchar('\n'); 565 b = false; 566 } else { 567 b = true; 568 } 569 } else { 570 fclose(in); 571 --curinclindex; 572 in = inclinfo[curinclindex].savefile; 573 errfilename = inclinfo[curinclindex].savefn; 574 errlineno = inclinfo[curinclindex].savelineno; 575 b = false; 576 } 577 return b; 578 } 579 580 /* 581 * Pop the current input. Return whether successful. 582 */ 583 584 public Boolean popinput() 585 { 586 Boolean b; 587 588 if (curinclindex == 0) { 589 b = false; 590 } else { 591 b = (Boolean) (not eofinput()); 592 } 593 return b; 594 } 595 596 /* 597 * Return whether we are currently reading from standard input. 598 */ 599 600 public Boolean isstdin() 601 { 602 return (Boolean) (in == stdin); 603 } 604 605 /* 606 * Send the current line to the shell. 607 */ 608 609 public shellline() 610 { 611 register char *p; 612 613 p = curchar; 614 while (*p != '\0' and (*p == '\n' or lexclass[*p] == WHITE)) { 615 ++p; 616 } 617 shell(p); 618 if (*p == '\0' and isterm(in)) { 619 putchar('\n'); 620 } 621 erecover(); 622 } 623 624 /* 625 * Read the rest of the current line in "shell mode". 626 */ 627 628 public beginshellmode() 629 { 630 shellmode = true; 631 } 632 633 /* 634 * Print out a token for debugging. 635 */ 636 637 public print_token(f, t) 638 File f; 639 Token t; 640 { 641 if (t == '\n') { 642 fprintf(f, "char '\\n'"); 643 } else if (t == EOF) { 644 fprintf(f, "EOF"); 645 } else if (t < 256) { 646 fprintf(f, "char '%c'", t); 647 } else { 648 fprintf(f, "\"%s\"", keywdstring(t)); 649 } 650 } 651