1 /* Copyright (c) 1982 Regents of the University of California */ 2 3 static char sccsid[] = "@(#)scanner.c 1.12 (Berkeley) 03/01/85"; 4 5 static char rcsid[] = "$Header: scanner.c,v 1.5 84/12/26 10:42:05 linton Exp $"; 6 7 /* 8 * Debugger scanner. 9 */ 10 11 #include "defs.h" 12 #include "scanner.h" 13 #include "main.h" 14 #include "keywords.h" 15 #include "tree.h" 16 #include "symbols.h" 17 #include "names.h" 18 #include "y.tab.h" 19 20 #ifndef public 21 typedef int Token; 22 23 #define MAXLINESIZE 10240 24 25 #endif 26 27 public String initfile = ".dbxinit"; 28 29 typedef enum { WHITE, ALPHA, NUM, OTHER } Charclass; 30 31 private Charclass class[256 + 1]; 32 private Charclass *lexclass = class + 1; 33 34 #define isdigit(c) (lexclass[c] == NUM) 35 #define isalnum(c) (lexclass[c] == ALPHA or lexclass[c] == NUM) 36 #define ishexdigit(c) ( \ 37 isdigit(c) or (c >= 'a' and c <= 'f') or (c >= 'A' and c <= 'F') \ 38 ) 39 40 public boolean chkalias; 41 public char scanner_linebuf[MAXLINESIZE]; 42 43 private File in; 44 private char *curchar, *prevchar; 45 46 #define MAXINCLDEPTH 10 47 48 private struct { 49 File savefile; 50 Filename savefn; 51 int savelineno; 52 } inclinfo[MAXINCLDEPTH]; 53 54 private unsigned int curinclindex; 55 56 private Token getident(); 57 private Token getnum(); 58 private Token getstring(); 59 private Boolean eofinput(); 60 private char charcon(); 61 62 private enterlexclass(class, s) 63 Charclass class; 64 String s; 65 { 66 register char *p; 67 68 for (p = s; *p != '\0'; p++) { 69 lexclass[*p] = class; 70 } 71 } 72 73 public scanner_init() 74 { 75 register Integer i; 76 77 for (i = 0; i < 257; i++) { 78 class[i] = OTHER; 79 } 80 enterlexclass(WHITE, " \t"); 81 enterlexclass(ALPHA, "abcdefghijklmnopqrstuvwxyz"); 82 enterlexclass(ALPHA, "ABCDEFGHIJKLMNOPQRSTUVWXYZ_$"); 83 enterlexclass(NUM, "0123456789"); 84 in = stdin; 85 errfilename = nil; 86 errlineno = 0; 87 curchar = scanner_linebuf; 88 scanner_linebuf[0] = '\0'; 89 chkalias = true; 90 } 91 92 /* 93 * Read a single token. 94 * 95 * The input is line buffered. Tokens cannot cross line boundaries. 96 * 97 * There are two "modes" of operation: one as in a compiler, 98 * and one for reading shell-like syntax. In the first mode 99 * there is the additional choice of doing alias processing. 100 */ 101 102 private Boolean shellmode; 103 104 public Token yylex() 105 { 106 register int c; 107 register char *p; 108 register Token t; 109 String line; 110 integer n; 111 112 p = curchar; 113 if (*p == '\0') { 114 do { 115 if (isterm(in)) { 116 printf("(%s) ", cmdname); 117 } 118 fflush(stdout); 119 line = fgets(scanner_linebuf, MAXLINESIZE, in); 120 } while (line == nil and not eofinput()); 121 if (line == nil) { 122 c = EOF; 123 } else { 124 p = scanner_linebuf; 125 while (lexclass[*p] == WHITE) { 126 p++; 127 } 128 shellmode = false; 129 } 130 chkalias = true; 131 } else { 132 while (lexclass[*p] == WHITE) { 133 p++; 134 } 135 } 136 curchar = p; 137 prevchar = curchar; 138 c = *p; 139 if (lexclass[c] == ALPHA) { 140 t = getident(chkalias); 141 } else if (lexclass[c] == NUM) { 142 if (shellmode) { 143 t = getident(chkalias); 144 } else { 145 t = getnum(); 146 } 147 } else { 148 ++curchar; 149 switch (c) { 150 case '\n': 151 t = '\n'; 152 if (errlineno != 0) { 153 errlineno++; 154 } 155 break; 156 157 case '"': 158 case '\'': 159 t = getstring(c); 160 break; 161 162 case '.': 163 if (shellmode) { 164 --curchar; 165 t = getident(chkalias); 166 } else if (isdigit(*curchar)) { 167 --curchar; 168 t = getnum(); 169 } else { 170 t = '.'; 171 } 172 break; 173 174 case '-': 175 if (shellmode) { 176 --curchar; 177 t = getident(chkalias); 178 } else if (*curchar == '>') { 179 ++curchar; 180 t = ARROW; 181 } else { 182 t = '-'; 183 } 184 break; 185 186 case '#': 187 if (not isterm(in)) { 188 *p = '\0'; 189 curchar = p; 190 t = '\n'; 191 ++errlineno; 192 } else { 193 t = '#'; 194 } 195 break; 196 197 case '\\': 198 if (*(p+1) == '\n') { 199 n = MAXLINESIZE - (p - &scanner_linebuf[0]); 200 if (n > 1) { 201 if (fgets(p, n, in) == nil) { 202 t = 0; 203 } else { 204 curchar = p; 205 t = yylex(); 206 } 207 } else { 208 t = '\\'; 209 } 210 } else { 211 t = '\\'; 212 } 213 break; 214 215 case EOF: 216 t = 0; 217 break; 218 219 default: 220 if (shellmode and index("!&*<>()[]", c) == nil) { 221 --curchar; 222 t = getident(chkalias); 223 } else { 224 t = c; 225 } 226 break; 227 } 228 } 229 chkalias = false; 230 # ifdef LEXDEBUG 231 if (lexdebug) { 232 fprintf(stderr, "yylex returns "); 233 print_token(stderr, t); 234 fprintf(stderr, "\n"); 235 } 236 # endif 237 return t; 238 } 239 240 /* 241 * Put the given string before the current character 242 * in the current line, thus inserting it into the input stream. 243 */ 244 245 public insertinput (s) 246 String s; 247 { 248 register char *p, *q; 249 int need, avail, shift; 250 251 q = s; 252 need = strlen(q); 253 avail = curchar - &scanner_linebuf[0]; 254 if (need <= avail) { 255 curchar = &scanner_linebuf[avail - need]; 256 p = curchar; 257 while (*q != '\0') { 258 *p++ = *q++; 259 } 260 } else { 261 p = curchar; 262 while (*p != '\0') { 263 ++p; 264 } 265 shift = need - avail; 266 if (p + shift >= &scanner_linebuf[MAXLINESIZE]) { 267 error("alias expansion too large"); 268 } 269 for (;;) { 270 *(p + shift) = *p; 271 if (p == curchar) { 272 break; 273 } 274 --p; 275 } 276 p = &scanner_linebuf[0]; 277 while (*q != '\0') { 278 *p++ = *q++; 279 } 280 curchar = &scanner_linebuf[0]; 281 } 282 } 283 284 /* 285 * Get the actuals for a macro call. 286 */ 287 288 private String movetochar (str, c) 289 String str; 290 char c; 291 { 292 register char *p; 293 294 while (*p != c) { 295 if (*p == '\0') { 296 error("missing ')' in macro call"); 297 } else if (*p == ')') { 298 error("not enough parameters in macro call"); 299 } else if (*p == ',') { 300 error("too many parameters in macro call"); 301 } 302 ++p; 303 } 304 return p; 305 } 306 307 private String *getactuals (n) 308 integer n; 309 { 310 String *a; 311 register char *p; 312 int i; 313 314 a = newarr(String, n); 315 p = curchar; 316 while (*p != '(') { 317 if (lexclass[*p] != WHITE) { 318 error("missing actuals for macro"); 319 } 320 ++p; 321 } 322 ++p; 323 for (i = 0; i < n - 1; i++) { 324 a[i] = p; 325 p = movetochar(p, ','); 326 *p = '\0'; 327 ++p; 328 } 329 a[n-1] = p; 330 p = movetochar(p, ')'); 331 *p = '\0'; 332 curchar = p + 1; 333 return a; 334 } 335 336 /* 337 * Do command macro expansion, assuming curchar points to the beginning 338 * of the actuals, and we are not in shell mode. 339 */ 340 341 private expand (pl, str) 342 List pl; 343 String str; 344 { 345 char buf[4096], namebuf[100]; 346 register char *p, *q, *r; 347 String *actual; 348 Name n; 349 integer i; 350 boolean match; 351 352 if (pl == nil) { 353 insertinput(str); 354 } else { 355 actual = getactuals(list_size(pl)); 356 p = buf; 357 q = str; 358 while (*q != '\0') { 359 if (p >= &buf[4096]) { 360 error("alias expansion too large"); 361 } 362 if (lexclass[*q] == ALPHA) { 363 r = namebuf; 364 do { 365 *r++ = *q++; 366 } while (isalnum(*q)); 367 *r = '\0'; 368 i = 0; 369 match = false; 370 foreach(Name, n, pl) 371 if (streq(ident(n), namebuf)) { 372 match = true; 373 break; 374 } 375 ++i; 376 endfor 377 if (match) { 378 r = actual[i]; 379 } else { 380 r = namebuf; 381 } 382 while (*r != '\0') { 383 *p++ = *r++; 384 } 385 } else { 386 *p++ = *q++; 387 } 388 } 389 *p = '\0'; 390 insertinput(buf); 391 } 392 } 393 394 /* 395 * Parser error handling. 396 */ 397 398 public yyerror(s) 399 String s; 400 { 401 register char *p; 402 register integer start; 403 404 if (streq(s, "syntax error")) { 405 beginerrmsg(); 406 p = prevchar; 407 start = p - &scanner_linebuf[0]; 408 if (p > &scanner_linebuf[0]) { 409 while (lexclass[*p] == WHITE and p > &scanner_linebuf[0]) { 410 --p; 411 } 412 } 413 fprintf(stderr, "%s", scanner_linebuf); 414 if (start != 0) { 415 fprintf(stderr, "%*c", start, ' '); 416 } 417 if (p == &scanner_linebuf[0]) { 418 fprintf(stderr, "^ unrecognized command"); 419 } else { 420 fprintf(stderr, "^ syntax error"); 421 } 422 enderrmsg(); 423 } else { 424 error(s); 425 } 426 } 427 428 /* 429 * Eat the current line. 430 */ 431 432 public gobble () 433 { 434 curchar = scanner_linebuf; 435 scanner_linebuf[0] = '\0'; 436 } 437 438 /* 439 * Scan an identifier. 440 * 441 * If chkalias is true, check first to see if it's an alias. 442 * Otherwise, check to see if it's a keyword. 443 */ 444 445 private Token getident (chkalias) 446 boolean chkalias; 447 { 448 char buf[1024]; 449 register char *p, *q; 450 register Token t; 451 List pl; 452 String str; 453 454 p = curchar; 455 q = buf; 456 if (shellmode) { 457 do { 458 *q++ = *p++; 459 } while (index(" \t\n!&<>*[]()'\"", *p) == nil); 460 } else { 461 do { 462 *q++ = *p++; 463 } while (isalnum(*p)); 464 } 465 curchar = p; 466 *q = '\0'; 467 yylval.y_name = identname(buf, false); 468 if (chkalias) { 469 if (findalias(yylval.y_name, &pl, &str)) { 470 expand(pl, str); 471 while (lexclass[*curchar] == WHITE) { 472 ++curchar; 473 } 474 if (pl == nil) { 475 t = getident(false); 476 } else { 477 t = getident(true); 478 } 479 } else if (shellmode) { 480 t = NAME; 481 } else { 482 t = findkeyword(yylval.y_name, NAME); 483 } 484 } else if (shellmode) { 485 t = NAME; 486 } else { 487 t = findkeyword(yylval.y_name, NAME); 488 } 489 return t; 490 } 491 492 /* 493 * Scan a number. 494 */ 495 496 private Token getnum() 497 { 498 char buf[1024]; 499 register Char *p, *q; 500 register Token t; 501 Integer base; 502 503 p = curchar; 504 q = buf; 505 if (*p == '0') { 506 if (*(p+1) == 'x') { 507 p += 2; 508 base = 16; 509 } else if (*(p+1) == 't') { 510 base = 10; 511 } else if (varIsSet("$hexin")) { 512 base = 16; 513 } else { 514 base = 8; 515 } 516 } else if (varIsSet("$hexin")) { 517 base = 16; 518 } else if (varIsSet("$octin")) { 519 base = 8; 520 } else { 521 base = 10; 522 } 523 if (base == 16) { 524 do { 525 *q++ = *p++; 526 } while (ishexdigit(*p)); 527 } else { 528 do { 529 *q++ = *p++; 530 } while (isdigit(*p)); 531 } 532 if (*p == '.') { 533 do { 534 *q++ = *p++; 535 } while (isdigit(*p)); 536 if (*p == 'e' or *p == 'E') { 537 p++; 538 if (*p == '+' or *p == '-' or isdigit(*p)) { 539 *q++ = 'e'; 540 do { 541 *q++ = *p++; 542 } while (isdigit(*p)); 543 } 544 } 545 *q = '\0'; 546 yylval.y_real = atof(buf); 547 t = REAL; 548 } else { 549 *q = '\0'; 550 switch (base) { 551 case 10: 552 yylval.y_int = atol(buf); 553 break; 554 555 case 8: 556 yylval.y_int = octal(buf); 557 break; 558 559 case 16: 560 yylval.y_int = hex(buf); 561 break; 562 563 default: 564 badcaseval(base); 565 } 566 t = INT; 567 } 568 curchar = p; 569 return t; 570 } 571 572 /* 573 * Convert a string of octal digits to an integer. 574 */ 575 576 private int octal(s) 577 String s; 578 { 579 register Char *p; 580 register Integer n; 581 582 n = 0; 583 for (p = s; *p != '\0'; p++) { 584 n = 8*n + (*p - '0'); 585 } 586 return n; 587 } 588 589 /* 590 * Convert a string of hexadecimal digits to an integer. 591 */ 592 593 private int hex(s) 594 String s; 595 { 596 register Char *p; 597 register Integer n; 598 599 n = 0; 600 for (p = s; *p != '\0'; p++) { 601 n *= 16; 602 if (*p >= 'a' and *p <= 'f') { 603 n += (*p - 'a' + 10); 604 } else if (*p >= 'A' and *p <= 'F') { 605 n += (*p - 'A' + 10); 606 } else { 607 n += (*p - '0'); 608 } 609 } 610 return n; 611 } 612 613 /* 614 * Scan a string. 615 */ 616 617 private Token getstring (quote) 618 char quote; 619 { 620 register char *p, *q; 621 char buf[MAXLINESIZE]; 622 boolean endofstring; 623 Token t; 624 625 p = curchar; 626 q = buf; 627 endofstring = false; 628 while (not endofstring) { 629 if (*p == '\\' and *(p+1) == '\n') { 630 if (fgets(scanner_linebuf, MAXLINESIZE, in) == nil) { 631 error("non-terminated string"); 632 } 633 p = &scanner_linebuf[0] - 1; 634 } else if (*p == '\n' or *p == '\0') { 635 error("non-terminated string"); 636 endofstring = true; 637 } else if (*p == quote) { 638 endofstring = true; 639 } else { 640 curchar = p; 641 *q++ = charcon(p); 642 p = curchar; 643 } 644 p++; 645 } 646 curchar = p; 647 *q = '\0'; 648 if (quote == '\'' and buf[1] == '\0') { 649 yylval.y_char = buf[0]; 650 t = CHAR; 651 } else { 652 yylval.y_string = strdup(buf); 653 t = STRING; 654 } 655 return t; 656 } 657 658 /* 659 * Process a character constant. 660 * Watch out for backslashes. 661 */ 662 663 private char charcon (s) 664 String s; 665 { 666 register char *p, *q; 667 char c, buf[10]; 668 669 p = s; 670 if (*p == '\\') { 671 ++p; 672 switch (*p) { 673 case '\\': 674 c = '\\'; 675 break; 676 677 case 'n': 678 c = '\n'; 679 break; 680 681 case 'r': 682 c = '\r'; 683 break; 684 685 case 't': 686 c = '\t'; 687 break; 688 689 case '\'': 690 case '"': 691 c = *p; 692 break; 693 694 default: 695 if (isdigit(*p)) { 696 q = buf; 697 do { 698 *q++ = *p++; 699 } while (isdigit(*p)); 700 *q = '\0'; 701 c = (char) octal(buf); 702 } 703 --p; 704 break; 705 } 706 curchar = p; 707 } else { 708 c = *p; 709 } 710 return c; 711 } 712 713 /* 714 * Input file management routines. 715 */ 716 717 public setinput(filename) 718 Filename filename; 719 { 720 File f; 721 722 f = fopen(filename, "r"); 723 if (f == nil) { 724 error("can't open %s", filename); 725 } else { 726 if (curinclindex >= MAXINCLDEPTH) { 727 error("unreasonable input nesting on \"%s\"", filename); 728 } 729 inclinfo[curinclindex].savefile = in; 730 inclinfo[curinclindex].savefn = errfilename; 731 inclinfo[curinclindex].savelineno = errlineno; 732 curinclindex++; 733 in = f; 734 errfilename = filename; 735 errlineno = 1; 736 } 737 } 738 739 private Boolean eofinput() 740 { 741 register Boolean b; 742 743 if (curinclindex == 0) { 744 if (isterm(in)) { 745 putchar('\n'); 746 clearerr(in); 747 b = false; 748 } else { 749 b = true; 750 } 751 } else { 752 fclose(in); 753 --curinclindex; 754 in = inclinfo[curinclindex].savefile; 755 errfilename = inclinfo[curinclindex].savefn; 756 errlineno = inclinfo[curinclindex].savelineno; 757 b = false; 758 } 759 return b; 760 } 761 762 /* 763 * Pop the current input. Return whether successful. 764 */ 765 766 public Boolean popinput() 767 { 768 Boolean b; 769 770 if (curinclindex == 0) { 771 b = false; 772 } else { 773 b = (Boolean) (not eofinput()); 774 } 775 return b; 776 } 777 778 /* 779 * Return whether we are currently reading from standard input. 780 */ 781 782 public Boolean isstdin() 783 { 784 return (Boolean) (in == stdin); 785 } 786 787 /* 788 * Send the current line to the shell. 789 */ 790 791 public shellline() 792 { 793 register char *p; 794 795 p = curchar; 796 while (*p != '\0' and (*p == '\n' or lexclass[*p] == WHITE)) { 797 ++p; 798 } 799 shell(p); 800 if (*p == '\0' and isterm(in)) { 801 putchar('\n'); 802 } 803 erecover(); 804 } 805 806 /* 807 * Read the rest of the current line in "shell mode". 808 */ 809 810 public beginshellmode() 811 { 812 shellmode = true; 813 } 814 815 /* 816 * Print out a token for debugging. 817 */ 818 819 public print_token(f, t) 820 File f; 821 Token t; 822 { 823 if (t == '\n') { 824 fprintf(f, "char '\\n'"); 825 } else if (t == EOF) { 826 fprintf(f, "EOF"); 827 } else if (t < 256) { 828 fprintf(f, "char '%c'", t); 829 } else { 830 fprintf(f, "\"%s\"", keywdstring(t)); 831 } 832 } 833