1 /* Copyright (c) 1982 Regents of the University of California */ 2 3 static char sccsid[] = "@(#)scanner.c 1.6 04/29/83"; 4 5 /* 6 * Debugger scanner. 7 */ 8 9 #include "defs.h" 10 #include "scanner.h" 11 #include "main.h" 12 #include "keywords.h" 13 #include "tree.h" 14 #include "symbols.h" 15 #include "names.h" 16 #include "y.tab.h" 17 18 #ifndef public 19 typedef int Token; 20 #endif 21 22 public String initfile = ".dbxinit"; 23 24 typedef enum { WHITE, ALPHA, NUM, OTHER } Charclass; 25 26 private Charclass class[256 + 1]; 27 private Charclass *lexclass = class + 1; 28 29 #define isdigit(c) (lexclass[c] == NUM) 30 #define isalnum(c) (lexclass[c] == ALPHA or lexclass[c] == NUM) 31 #define ishexdigit(c) ( \ 32 isdigit(c) or (c >= 'a' and c <= 'f') or (c >= 'A' and c <= 'F') \ 33 ) 34 35 #define MAXLINESIZE 1024 36 37 private File in; 38 private Char linebuf[MAXLINESIZE]; 39 private Char *curchar; 40 41 #define MAXINCLDEPTH 10 42 43 private struct { 44 File savefile; 45 Filename savefn; 46 int savelineno; 47 } inclinfo[MAXINCLDEPTH]; 48 49 private unsigned int curinclindex; 50 51 private Token getident(); 52 private Token getnum(); 53 private Token getstring(); 54 private Boolean eofinput(); 55 private Char charcon(); 56 private Char charlookup(); 57 58 private enterlexclass(class, s) 59 Charclass class; 60 String s; 61 { 62 register char *p; 63 64 for (p = s; *p != '\0'; p++) { 65 lexclass[*p] = class; 66 } 67 } 68 69 public scanner_init() 70 { 71 register Integer i; 72 73 for (i = 0; i < 257; i++) { 74 class[i] = OTHER; 75 } 76 enterlexclass(WHITE, " \t"); 77 enterlexclass(ALPHA, "abcdefghijklmnopqrstuvwxyz"); 78 enterlexclass(ALPHA, "ABCDEFGHIJKLMNOPQRSTUVWXYZ_$"); 79 enterlexclass(NUM, "0123456789"); 80 in = stdin; 81 errfilename = nil; 82 errlineno = 0; 83 curchar = linebuf; 84 linebuf[0] = '\0'; 85 } 86 87 /* 88 * Read a single token. 89 * 90 * Input is line buffered. 91 * 92 * There are two "modes" of operation: one as in a compiler, 93 * and one for reading shell-like syntax. 94 */ 95 96 private Boolean shellmode; 97 98 public Token yylex() 99 { 100 register int c; 101 register char *p; 102 register Token t; 103 String line; 104 105 p = curchar; 106 if (*p == '\0') { 107 do { 108 if (isterm(in)) { 109 printf("> "); 110 fflush(stdout); 111 } 112 line = fgets(linebuf, MAXLINESIZE, in); 113 } while (line == nil and not eofinput()); 114 if (line == nil) { 115 c = EOF; 116 } else { 117 p = linebuf; 118 while (lexclass[*p] == WHITE) { 119 p++; 120 } 121 shellmode = false; 122 } 123 } else { 124 while (lexclass[*p] == WHITE) { 125 p++; 126 } 127 } 128 curchar = p; 129 c = *p; 130 if (lexclass[c] == ALPHA) { 131 t = getident(); 132 } else if (lexclass[c] == NUM) { 133 if (shellmode) { 134 t = getident(); 135 } else { 136 t = getnum(); 137 } 138 } else { 139 ++curchar; 140 switch (c) { 141 case '\n': 142 t = '\n'; 143 if (errlineno != 0) { 144 errlineno++; 145 } 146 break; 147 148 case '"': 149 case '\'': 150 t = getstring(); 151 break; 152 153 case '.': 154 if (shellmode) { 155 --curchar; 156 t = getident(); 157 } else if (isdigit(*curchar)) { 158 --curchar; 159 t = getnum(); 160 } else { 161 t = '.'; 162 } 163 break; 164 165 case '<': 166 if (not shellmode and *curchar == '<') { 167 ++curchar; 168 t = LFORMER; 169 } else { 170 t = '<'; 171 } 172 break; 173 174 case '>': 175 if (not shellmode and *curchar == '>') { 176 ++curchar; 177 t = RFORMER; 178 } else { 179 t = '>'; 180 } 181 break; 182 183 case '#': 184 if (*curchar == '^') { 185 ++curchar; 186 t = ABSTRACTION; 187 } else { 188 t = '#'; 189 } 190 break; 191 192 case '-': 193 if (shellmode) { 194 --curchar; 195 t = getident(); 196 } else if (*curchar == '>') { 197 ++curchar; 198 t = ARROW; 199 } else { 200 t = '-'; 201 } 202 break; 203 204 case EOF: 205 t = 0; 206 break; 207 208 default: 209 if (shellmode and index("!&*()[]", c) == nil) { 210 --curchar; 211 t = getident(); 212 } else { 213 t = c; 214 } 215 break; 216 } 217 } 218 # ifdef LEXDEBUG 219 if (lexdebug) { 220 fprintf(stderr, "yylex returns "); 221 print_token(stderr, t); 222 fprintf(stderr, "\n"); 223 } 224 # endif 225 return t; 226 } 227 228 /* 229 * Parser error handling. 230 */ 231 232 public yyerror(s) 233 String s; 234 { 235 register Char *p, *tokenbegin, *tokenend; 236 register Integer len; 237 238 if (streq(s, "syntax error")) { 239 beginerrmsg(); 240 tokenend = curchar - 1; 241 tokenbegin = tokenend; 242 while (lexclass[*tokenbegin] != WHITE and tokenbegin > &linebuf[0]) { 243 --tokenbegin; 244 } 245 len = tokenend - tokenbegin + 1; 246 p = tokenbegin; 247 if (p > &linebuf[0]) { 248 while (lexclass[*p] == WHITE and p > &linebuf[0]) { 249 --p; 250 } 251 } 252 if (p == &linebuf[0]) { 253 fprintf(stderr, "unrecognized command \"%.*s\"", len, tokenbegin); 254 } else { 255 fprintf(stderr, "syntax error"); 256 if (len != 0) { 257 fprintf(stderr, " on \"%.*s\"", len, tokenbegin); 258 } 259 } 260 enderrmsg(); 261 } else { 262 error(s); 263 } 264 } 265 266 /* 267 * Eat the current line. 268 */ 269 270 public gobble() 271 { 272 curchar = linebuf; 273 linebuf[0] = '\0'; 274 } 275 276 /* 277 * Scan an identifier and check to see if it's a keyword. 278 */ 279 280 private Token getident() 281 { 282 char buf[256]; 283 register Char *p, *q; 284 register Token t; 285 286 p = curchar; 287 q = buf; 288 if (shellmode) { 289 do { 290 *q++ = *p++; 291 } while (index(" \t\n!&<>*[]()", *p) == nil); 292 } else { 293 do { 294 *q++ = *p++; 295 } while (isalnum(*p)); 296 } 297 curchar = p; 298 *q = '\0'; 299 yylval.y_name = identname(buf, false); 300 if (not shellmode) { 301 t = findkeyword(yylval.y_name); 302 if (t == nil) { 303 t = NAME; 304 } 305 } else { 306 t = NAME; 307 } 308 return t; 309 } 310 311 /* 312 * Scan a number. 313 */ 314 315 private Token getnum() 316 { 317 char buf[256]; 318 register Char *p, *q; 319 register Token t; 320 Integer base; 321 322 p = curchar; 323 q = buf; 324 if (*p == '0') { 325 if (*(p+1) == 'x') { 326 p += 2; 327 base = 16; 328 } else { 329 base = 8; 330 } 331 } else { 332 base = 10; 333 } 334 if (base == 16) { 335 do { 336 *q++ = *p++; 337 } while (ishexdigit(*p)); 338 } else { 339 do { 340 *q++ = *p++; 341 } while (isdigit(*p)); 342 } 343 if (*p == '.') { 344 do { 345 *q++ = *p++; 346 } while (isdigit(*p)); 347 if (*p == 'e' or *p == 'E') { 348 p++; 349 if (*p == '+' or *p == '-' or isdigit(*p)) { 350 *q++ = 'e'; 351 do { 352 *q++ = *p++; 353 } while (isdigit(*p)); 354 } 355 } 356 *q = '\0'; 357 yylval.y_real = atof(buf); 358 t = REAL; 359 } else { 360 *q = '\0'; 361 switch (base) { 362 case 10: 363 yylval.y_int = atol(buf); 364 break; 365 366 case 8: 367 yylval.y_int = octal(buf); 368 break; 369 370 case 16: 371 yylval.y_int = hex(buf); 372 break; 373 374 default: 375 badcaseval(base); 376 } 377 t = INT; 378 } 379 curchar = p; 380 return t; 381 } 382 383 /* 384 * Convert a string of octal digits to an integer. 385 */ 386 387 private int octal(s) 388 String s; 389 { 390 register Char *p; 391 register Integer n; 392 393 n = 0; 394 for (p = s; *p != '\0'; p++) { 395 n = 8*n + (*p - '0'); 396 } 397 return n; 398 } 399 400 /* 401 * Convert a string of hexadecimal digits to an integer. 402 */ 403 404 private int hex(s) 405 String s; 406 { 407 register Char *p; 408 register Integer n; 409 410 n = 0; 411 for (p = s; *p != '\0'; p++) { 412 n *= 16; 413 if (*p >= 'a' and *p <= 'f') { 414 n += (*p - 'a' + 10); 415 } else if (*p >= 'A' and *p <= 'F') { 416 n += (*p - 'A' + 10); 417 } else { 418 n += (*p - '0'); 419 } 420 } 421 return n; 422 } 423 424 /* 425 * Scan a string. 426 */ 427 428 private Token getstring() 429 { 430 char buf[256]; 431 register Char *p, *q; 432 Boolean endofstring; 433 434 p = curchar; 435 q = buf; 436 endofstring = false; 437 while (not endofstring) { 438 if (*p == '\n' or *p == '\0') { 439 error("non-terminated string"); 440 endofstring = true; 441 } else if (*p == '"' or *p == '\'') { 442 if (*(p+1) != *p) { 443 endofstring = true; 444 } else { 445 *q++ = *p; 446 } 447 } else { 448 *q++ = charcon(p); 449 p = curchar; 450 } 451 p++; 452 } 453 curchar = p; 454 *q = '\0'; 455 yylval.y_string = strdup(buf); 456 return STRING; 457 } 458 459 /* 460 * Process a character constant. 461 * Watch out for backslashes. 462 */ 463 464 private Char charcon(p) 465 char *p; 466 { 467 char c, buf[10], *q; 468 469 if (*p == '\\') { 470 ++p; 471 if (*p != '\\') { 472 q = buf; 473 do { 474 *q++ = *p++; 475 } while (*p != '\\' and *p != '\'' and *p != '\n' and *p != '\0'); 476 *q = '\0'; 477 if (isdigit(buf[0])) { 478 c = (Char) octal(buf); 479 } else { 480 c = charlookup(buf); 481 } 482 curchar = p - 1; 483 } else { 484 c = '\\'; 485 } 486 } else { 487 c = *p; 488 } 489 return c; 490 } 491 492 /* 493 * Do a lookup for a ASCII character name. 494 */ 495 496 private String ascii[] = { 497 "NUL", "SOH", "STX", "ETX", "EOT", "ENQ", "ACK", "BEL", 498 "BS", "HT", "NL", "VT", "NP", "CR", "SO", "SI", 499 "DLE", "DC1", "DC2", "DC3", "DC4", "NAK", "SYN", "ETB", 500 "CAN", "EM", "SUB", "ESC", "FS", "GS", "RS", "US", 501 "SP", nil 502 }; 503 504 private char charlookup(s) 505 String s; 506 { 507 register int i; 508 509 for (i = 0; ascii[i] != NULL; i++) { 510 if (streq(s, ascii[i])) { 511 return i; 512 } 513 } 514 if (streq(s, "DEL")) { 515 return 0177; 516 } 517 error("unknown ascii name \"%s\"", s); 518 return '?'; 519 } 520 521 /* 522 * Input file management routines. 523 */ 524 525 public setinput(filename) 526 Filename filename; 527 { 528 File f; 529 530 f = fopen(filename, "r"); 531 if (f == nil) { 532 error("can't open %s", filename); 533 } else { 534 if (curinclindex >= MAXINCLDEPTH) { 535 error("unreasonable input nesting on \"%s\"", filename); 536 } 537 inclinfo[curinclindex].savefile = in; 538 inclinfo[curinclindex].savefn = errfilename; 539 inclinfo[curinclindex].savelineno = errlineno; 540 curinclindex++; 541 in = f; 542 errfilename = filename; 543 errlineno = 1; 544 } 545 } 546 547 private Boolean eofinput() 548 { 549 register Boolean b; 550 551 if (curinclindex == 0) { 552 if (isterm(in)) { 553 putchar('\n'); 554 b = false; 555 } else { 556 b = true; 557 } 558 } else { 559 fclose(in); 560 --curinclindex; 561 in = inclinfo[curinclindex].savefile; 562 errfilename = inclinfo[curinclindex].savefn; 563 errlineno = inclinfo[curinclindex].savelineno; 564 b = false; 565 } 566 return b; 567 } 568 569 /* 570 * Pop the current input. Return whether successful. 571 */ 572 573 public Boolean popinput() 574 { 575 Boolean b; 576 577 if (curinclindex == 0) { 578 b = false; 579 } else { 580 b = (Boolean) (not eofinput()); 581 } 582 return b; 583 } 584 585 /* 586 * Return whether we are currently reading from standard input. 587 */ 588 589 public Boolean isstdin() 590 { 591 return (Boolean) (in == stdin); 592 } 593 594 /* 595 * Send the current line to the shell. 596 */ 597 598 public shellline() 599 { 600 register char *p; 601 602 p = curchar; 603 while (*p != '\0' and (*p == '\n' or lexclass[*p] == WHITE)) { 604 ++p; 605 } 606 shell(p); 607 if (*p == '\0' and isterm(in)) { 608 putchar('\n'); 609 } 610 erecover(); 611 } 612 613 /* 614 * Read the rest of the current line in "shell mode". 615 */ 616 617 public beginshellmode() 618 { 619 shellmode = true; 620 } 621 622 /* 623 * Print out a token for debugging. 624 */ 625 626 public print_token(f, t) 627 File f; 628 Token t; 629 { 630 if (t == '\n') { 631 fprintf(f, "char '\\n'"); 632 } else if (t == EOF) { 633 fprintf(f, "EOF"); 634 } else if (t < 256) { 635 fprintf(f, "char '%c'", t); 636 } else { 637 fprintf(f, "\"%s\"", keywdstring(t)); 638 } 639 } 640