1 /* Copyright (c) 1982 Regents of the University of California */ 2 3 static char sccsid[] = "@(#)scanner.c 1.8 8/5/83"; 4 5 static char rcsid[] = "$Header: scanner.c,v 1.3 84/03/27 10:23:50 linton Exp $"; 6 7 /* 8 * Debugger scanner. 9 */ 10 11 #include "defs.h" 12 #include "scanner.h" 13 #include "main.h" 14 #include "keywords.h" 15 #include "tree.h" 16 #include "symbols.h" 17 #include "names.h" 18 #include "y.tab.h" 19 20 #ifndef public 21 typedef int Token; 22 #endif 23 24 public String initfile = ".dbxinit"; 25 26 typedef enum { WHITE, ALPHA, NUM, OTHER } Charclass; 27 28 private Charclass class[256 + 1]; 29 private Charclass *lexclass = class + 1; 30 31 #define isdigit(c) (lexclass[c] == NUM) 32 #define isalnum(c) (lexclass[c] == ALPHA or lexclass[c] == NUM) 33 #define ishexdigit(c) ( \ 34 isdigit(c) or (c >= 'a' and c <= 'f') or (c >= 'A' and c <= 'F') \ 35 ) 36 37 #define MAXLINESIZE 1024 38 39 private File in; 40 private Char linebuf[MAXLINESIZE]; 41 private Char *curchar, *prevchar; 42 43 #define MAXINCLDEPTH 10 44 45 private struct { 46 File savefile; 47 Filename savefn; 48 int savelineno; 49 } inclinfo[MAXINCLDEPTH]; 50 51 private unsigned int curinclindex; 52 53 private Token getident(); 54 private Token getnum(); 55 private Token getstring(); 56 private Boolean eofinput(); 57 private Char charcon(); 58 private Char charlookup(); 59 60 private enterlexclass(class, s) 61 Charclass class; 62 String s; 63 { 64 register char *p; 65 66 for (p = s; *p != '\0'; p++) { 67 lexclass[*p] = class; 68 } 69 } 70 71 public scanner_init() 72 { 73 register Integer i; 74 75 for (i = 0; i < 257; i++) { 76 class[i] = OTHER; 77 } 78 enterlexclass(WHITE, " \t"); 79 enterlexclass(ALPHA, "abcdefghijklmnopqrstuvwxyz"); 80 enterlexclass(ALPHA, "ABCDEFGHIJKLMNOPQRSTUVWXYZ_$"); 81 enterlexclass(NUM, "0123456789"); 82 in = stdin; 83 errfilename = nil; 84 errlineno = 0; 85 curchar = linebuf; 86 linebuf[0] = '\0'; 87 } 88 89 /* 90 * Read a single token. 91 * 92 * Input is line buffered. 93 * 94 * There are two "modes" of operation: one as in a compiler, 95 * and one for reading shell-like syntax. 96 */ 97 98 private Boolean shellmode; 99 100 public Token yylex() 101 { 102 register int c; 103 register char *p; 104 register Token t; 105 String line; 106 107 p = curchar; 108 if (*p == '\0') { 109 do { 110 if (isterm(in)) { 111 printf("(%s) ", cmdname); 112 fflush(stdout); 113 } 114 line = fgets(linebuf, MAXLINESIZE, in); 115 } while (line == nil and not eofinput()); 116 if (line == nil) { 117 c = EOF; 118 } else { 119 p = linebuf; 120 while (lexclass[*p] == WHITE) { 121 p++; 122 } 123 shellmode = false; 124 } 125 } else { 126 while (lexclass[*p] == WHITE) { 127 p++; 128 } 129 } 130 curchar = p; 131 prevchar = curchar; 132 c = *p; 133 if (lexclass[c] == ALPHA) { 134 t = getident(); 135 } else if (lexclass[c] == NUM) { 136 if (shellmode) { 137 t = getident(); 138 } else { 139 t = getnum(); 140 } 141 } else { 142 ++curchar; 143 switch (c) { 144 case '\n': 145 t = '\n'; 146 if (errlineno != 0) { 147 errlineno++; 148 } 149 break; 150 151 case '"': 152 case '\'': 153 t = getstring(); 154 break; 155 156 case '.': 157 if (shellmode) { 158 --curchar; 159 t = getident(); 160 } else if (isdigit(*curchar)) { 161 --curchar; 162 t = getnum(); 163 } else { 164 t = '.'; 165 } 166 break; 167 168 case '<': 169 if (not shellmode and *curchar == '<') { 170 ++curchar; 171 t = LFORMER; 172 } else { 173 t = '<'; 174 } 175 break; 176 177 case '>': 178 if (not shellmode and *curchar == '>') { 179 ++curchar; 180 t = RFORMER; 181 } else { 182 t = '>'; 183 } 184 break; 185 186 case '#': 187 if (*curchar == '^') { 188 ++curchar; 189 t = ABSTRACTION; 190 } else { 191 t = '#'; 192 } 193 break; 194 195 case '-': 196 if (shellmode) { 197 --curchar; 198 t = getident(); 199 } else if (*curchar == '>') { 200 ++curchar; 201 t = ARROW; 202 } else { 203 t = '-'; 204 } 205 break; 206 207 case EOF: 208 t = 0; 209 break; 210 211 default: 212 if (shellmode and index("!&*()[]", c) == nil) { 213 --curchar; 214 t = getident(); 215 } else { 216 t = c; 217 } 218 break; 219 } 220 } 221 # ifdef LEXDEBUG 222 if (lexdebug) { 223 fprintf(stderr, "yylex returns "); 224 print_token(stderr, t); 225 fprintf(stderr, "\n"); 226 } 227 # endif 228 return t; 229 } 230 231 /* 232 * Parser error handling. 233 */ 234 235 public yyerror(s) 236 String s; 237 { 238 register char *p; 239 register integer start; 240 241 if (streq(s, "syntax error")) { 242 beginerrmsg(); 243 p = prevchar; 244 start = p - &linebuf[0]; 245 if (p > &linebuf[0]) { 246 while (lexclass[*p] == WHITE and p > &linebuf[0]) { 247 --p; 248 } 249 } 250 fprintf(stderr, "%s", linebuf); 251 if (start != 0) { 252 fprintf(stderr, "%*c", start, ' '); 253 } 254 if (p == &linebuf[0]) { 255 fprintf(stderr, "^ unrecognized command"); 256 } else { 257 fprintf(stderr, "^ syntax error"); 258 } 259 enderrmsg(); 260 } else { 261 error(s); 262 } 263 } 264 265 /* 266 * Eat the current line. 267 */ 268 269 public gobble() 270 { 271 curchar = linebuf; 272 linebuf[0] = '\0'; 273 } 274 275 /* 276 * Scan an identifier and check to see if it's a keyword. 277 */ 278 279 private Token getident() 280 { 281 char buf[256]; 282 register Char *p, *q; 283 register Token t; 284 285 p = curchar; 286 q = buf; 287 if (shellmode) { 288 do { 289 *q++ = *p++; 290 } while (index(" \t\n!&<>*[]()'\"", *p) == nil); 291 } else { 292 do { 293 *q++ = *p++; 294 } while (isalnum(*p)); 295 } 296 curchar = p; 297 *q = '\0'; 298 yylval.y_name = identname(buf, false); 299 if (not shellmode) { 300 t = findkeyword(yylval.y_name); 301 if (t == nil) { 302 t = NAME; 303 } 304 } else { 305 t = NAME; 306 } 307 return t; 308 } 309 310 /* 311 * Scan a number. 312 */ 313 314 private Token getnum() 315 { 316 char buf[256]; 317 register Char *p, *q; 318 register Token t; 319 Integer base; 320 321 p = curchar; 322 q = buf; 323 if (*p == '0') { 324 if (*(p+1) == 'x') { 325 p += 2; 326 base = 16; 327 } else { 328 base = 8; 329 } 330 } else { 331 base = 10; 332 } 333 if (base == 16) { 334 do { 335 *q++ = *p++; 336 } while (ishexdigit(*p)); 337 } else { 338 do { 339 *q++ = *p++; 340 } while (isdigit(*p)); 341 } 342 if (*p == '.') { 343 do { 344 *q++ = *p++; 345 } while (isdigit(*p)); 346 if (*p == 'e' or *p == 'E') { 347 p++; 348 if (*p == '+' or *p == '-' or isdigit(*p)) { 349 *q++ = 'e'; 350 do { 351 *q++ = *p++; 352 } while (isdigit(*p)); 353 } 354 } 355 *q = '\0'; 356 yylval.y_real = atof(buf); 357 t = REAL; 358 } else { 359 *q = '\0'; 360 switch (base) { 361 case 10: 362 yylval.y_int = atol(buf); 363 break; 364 365 case 8: 366 yylval.y_int = octal(buf); 367 break; 368 369 case 16: 370 yylval.y_int = hex(buf); 371 break; 372 373 default: 374 badcaseval(base); 375 } 376 t = INT; 377 } 378 curchar = p; 379 return t; 380 } 381 382 /* 383 * Convert a string of octal digits to an integer. 384 */ 385 386 private int octal(s) 387 String s; 388 { 389 register Char *p; 390 register Integer n; 391 392 n = 0; 393 for (p = s; *p != '\0'; p++) { 394 n = 8*n + (*p - '0'); 395 } 396 return n; 397 } 398 399 /* 400 * Convert a string of hexadecimal digits to an integer. 401 */ 402 403 private int hex(s) 404 String s; 405 { 406 register Char *p; 407 register Integer n; 408 409 n = 0; 410 for (p = s; *p != '\0'; p++) { 411 n *= 16; 412 if (*p >= 'a' and *p <= 'f') { 413 n += (*p - 'a' + 10); 414 } else if (*p >= 'A' and *p <= 'F') { 415 n += (*p - 'A' + 10); 416 } else { 417 n += (*p - '0'); 418 } 419 } 420 return n; 421 } 422 423 /* 424 * Scan a string. 425 */ 426 427 private Token getstring() 428 { 429 char buf[256]; 430 register Char *p, *q; 431 Boolean endofstring; 432 433 p = curchar; 434 q = buf; 435 endofstring = false; 436 while (not endofstring) { 437 if (*p == '\n' or *p == '\0') { 438 error("non-terminated string"); 439 endofstring = true; 440 } else if (*p == '"' or *p == '\'') { 441 if (*(p+1) != *p) { 442 endofstring = true; 443 } else { 444 *q++ = *p; 445 } 446 } else { 447 curchar = p; 448 *q++ = charcon(p); 449 p = curchar; 450 } 451 p++; 452 } 453 curchar = p; 454 *q = '\0'; 455 yylval.y_string = strdup(buf); 456 return STRING; 457 } 458 459 /* 460 * Process a character constant. 461 * Watch out for backslashes. 462 */ 463 464 private Char charcon(p) 465 char *p; 466 { 467 char c, buf[10], *q; 468 469 if (*p == '\\') { 470 ++p; 471 if (*p != '\\') { 472 q = buf; 473 do { 474 *q++ = *p++; 475 } while (*p != '\\' and *p != '\'' and *p != '\n' and *p != '\0'); 476 *q = '\0'; 477 if (isdigit(buf[0])) { 478 c = (Char) octal(buf); 479 } else { 480 c = charlookup(buf); 481 } 482 curchar = p - 1; 483 } else { 484 c = '\\'; 485 } 486 } else { 487 c = *p; 488 } 489 return c; 490 } 491 492 /* 493 * Do a lookup for a ASCII character name. 494 */ 495 496 private String ascii[] = { 497 "NUL", "SOH", "STX", "ETX", "EOT", "ENQ", "ACK", "BEL", 498 "BS", "HT", "NL", "VT", "NP", "CR", "SO", "SI", 499 "DLE", "DC1", "DC2", "DC3", "DC4", "NAK", "SYN", "ETB", 500 "CAN", "EM", "SUB", "ESC", "FS", "GS", "RS", "US", 501 "SP", nil 502 }; 503 504 private char charlookup(s) 505 String s; 506 { 507 register int i; 508 509 for (i = 0; ascii[i] != NULL; i++) { 510 if (streq(s, ascii[i])) { 511 return i; 512 } 513 } 514 if (streq(s, "DEL")) { 515 return 0177; 516 } 517 error("unknown ascii name \"%s\"", s); 518 return '?'; 519 } 520 521 /* 522 * Input file management routines. 523 */ 524 525 public setinput(filename) 526 Filename filename; 527 { 528 File f; 529 530 f = fopen(filename, "r"); 531 if (f == nil) { 532 error("can't open %s", filename); 533 } else { 534 if (curinclindex >= MAXINCLDEPTH) { 535 error("unreasonable input nesting on \"%s\"", filename); 536 } 537 inclinfo[curinclindex].savefile = in; 538 inclinfo[curinclindex].savefn = errfilename; 539 inclinfo[curinclindex].savelineno = errlineno; 540 curinclindex++; 541 in = f; 542 errfilename = filename; 543 errlineno = 1; 544 } 545 } 546 547 private Boolean eofinput() 548 { 549 register Boolean b; 550 551 if (curinclindex == 0) { 552 if (isterm(in)) { 553 putchar('\n'); 554 clearerr(in); 555 b = false; 556 } else { 557 b = true; 558 } 559 } else { 560 fclose(in); 561 --curinclindex; 562 in = inclinfo[curinclindex].savefile; 563 errfilename = inclinfo[curinclindex].savefn; 564 errlineno = inclinfo[curinclindex].savelineno; 565 b = false; 566 } 567 return b; 568 } 569 570 /* 571 * Pop the current input. Return whether successful. 572 */ 573 574 public Boolean popinput() 575 { 576 Boolean b; 577 578 if (curinclindex == 0) { 579 b = false; 580 } else { 581 b = (Boolean) (not eofinput()); 582 } 583 return b; 584 } 585 586 /* 587 * Return whether we are currently reading from standard input. 588 */ 589 590 public Boolean isstdin() 591 { 592 return (Boolean) (in == stdin); 593 } 594 595 /* 596 * Send the current line to the shell. 597 */ 598 599 public shellline() 600 { 601 register char *p; 602 603 p = curchar; 604 while (*p != '\0' and (*p == '\n' or lexclass[*p] == WHITE)) { 605 ++p; 606 } 607 shell(p); 608 if (*p == '\0' and isterm(in)) { 609 putchar('\n'); 610 } 611 erecover(); 612 } 613 614 /* 615 * Read the rest of the current line in "shell mode". 616 */ 617 618 public beginshellmode() 619 { 620 shellmode = true; 621 } 622 623 /* 624 * Print out a token for debugging. 625 */ 626 627 public print_token(f, t) 628 File f; 629 Token t; 630 { 631 if (t == '\n') { 632 fprintf(f, "char '\\n'"); 633 } else if (t == EOF) { 634 fprintf(f, "EOF"); 635 } else if (t < 256) { 636 fprintf(f, "char '%c'", t); 637 } else { 638 fprintf(f, "\"%s\"", keywdstring(t)); 639 } 640 } 641