1 /* Copyright (c) 1982 Regents of the University of California */ 2 3 static char sccsid[] = "@(#)scanner.c 1.9 (Berkeley) 06/23/84"; 4 5 /* 6 * Debugger scanner. 7 */ 8 9 #include "defs.h" 10 #include "scanner.h" 11 #include "main.h" 12 #include "keywords.h" 13 #include "tree.h" 14 #include "symbols.h" 15 #include "names.h" 16 #include "y.tab.h" 17 18 #ifndef public 19 typedef int Token; 20 #endif 21 22 public String initfile = ".dbxinit"; 23 24 typedef enum { WHITE, ALPHA, NUM, OTHER } Charclass; 25 26 private Charclass class[256 + 1]; 27 private Charclass *lexclass = class + 1; 28 29 #define isdigit(c) (lexclass[c] == NUM) 30 #define isalnum(c) (lexclass[c] == ALPHA or lexclass[c] == NUM) 31 #define ishexdigit(c) ( \ 32 isdigit(c) or (c >= 'a' and c <= 'f') or (c >= 'A' and c <= 'F') \ 33 ) 34 35 #define MAXLINESIZE 1024 36 37 private File in; 38 private Char linebuf[MAXLINESIZE]; 39 private Char *curchar, *prevchar; 40 41 #define MAXINCLDEPTH 10 42 43 private struct { 44 File savefile; 45 Filename savefn; 46 int savelineno; 47 } inclinfo[MAXINCLDEPTH]; 48 49 private unsigned int curinclindex; 50 51 private Token getident(); 52 private Token getnum(); 53 private Token getstring(); 54 private Boolean eofinput(); 55 private Char charcon(); 56 private Char charlookup(); 57 58 private enterlexclass(class, s) 59 Charclass class; 60 String s; 61 { 62 register char *p; 63 64 for (p = s; *p != '\0'; p++) { 65 lexclass[*p] = class; 66 } 67 } 68 69 public scanner_init() 70 { 71 register Integer i; 72 73 for (i = 0; i < 257; i++) { 74 class[i] = OTHER; 75 } 76 enterlexclass(WHITE, " \t"); 77 enterlexclass(ALPHA, "abcdefghijklmnopqrstuvwxyz"); 78 enterlexclass(ALPHA, "ABCDEFGHIJKLMNOPQRSTUVWXYZ_$"); 79 enterlexclass(NUM, "0123456789"); 80 in = stdin; 81 errfilename = nil; 82 errlineno = 0; 83 curchar = linebuf; 84 linebuf[0] = '\0'; 85 } 86 87 /* 88 * Read a single token. 89 * 90 * Input is line buffered. 91 * 92 * There are two "modes" of operation: one as in a compiler, 93 * and one for reading shell-like syntax. 94 */ 95 96 private Boolean shellmode; 97 98 public Token yylex() 99 { 100 register int c; 101 register char *p; 102 register Token t; 103 String line; 104 105 p = curchar; 106 if (*p == '\0') { 107 do { 108 if (isterm(in)) { 109 printf("(%s) ", cmdname); 110 fflush(stdout); 111 } 112 line = fgets(linebuf, MAXLINESIZE, in); 113 } while (line == nil and not eofinput()); 114 if (line == nil) { 115 c = EOF; 116 } else { 117 p = linebuf; 118 while (lexclass[*p] == WHITE) { 119 p++; 120 } 121 shellmode = false; 122 } 123 } else { 124 while (lexclass[*p] == WHITE) { 125 p++; 126 } 127 } 128 curchar = p; 129 prevchar = curchar; 130 c = *p; 131 if (lexclass[c] == ALPHA) { 132 t = getident(); 133 } else if (lexclass[c] == NUM) { 134 if (shellmode) { 135 t = getident(); 136 } else { 137 t = getnum(); 138 } 139 } else { 140 ++curchar; 141 switch (c) { 142 case '\n': 143 t = '\n'; 144 if (errlineno != 0) { 145 errlineno++; 146 } 147 break; 148 149 case '"': 150 case '\'': 151 t = getstring(); 152 break; 153 154 case '.': 155 if (shellmode) { 156 --curchar; 157 t = getident(); 158 } else if (isdigit(*curchar)) { 159 --curchar; 160 t = getnum(); 161 } else { 162 t = '.'; 163 } 164 break; 165 166 case '<': 167 if (not shellmode and *curchar == '<') { 168 ++curchar; 169 t = LFORMER; 170 } else { 171 t = '<'; 172 } 173 break; 174 175 case '>': 176 if (not shellmode and *curchar == '>') { 177 ++curchar; 178 t = RFORMER; 179 } else { 180 t = '>'; 181 } 182 break; 183 184 case '#': 185 if (*curchar == '^') { 186 ++curchar; 187 t = ABSTRACTION; 188 } else { 189 t = '#'; 190 } 191 break; 192 193 case '-': 194 if (shellmode) { 195 --curchar; 196 t = getident(); 197 } else if (*curchar == '>') { 198 ++curchar; 199 t = ARROW; 200 } else { 201 t = '-'; 202 } 203 break; 204 205 case EOF: 206 t = 0; 207 break; 208 209 default: 210 if (shellmode and index("!&*()[]", c) == nil) { 211 --curchar; 212 t = getident(); 213 } else { 214 t = c; 215 } 216 break; 217 } 218 } 219 # ifdef LEXDEBUG 220 if (lexdebug) { 221 fprintf(stderr, "yylex returns "); 222 print_token(stderr, t); 223 fprintf(stderr, "\n"); 224 } 225 # endif 226 return t; 227 } 228 229 /* 230 * Parser error handling. 231 */ 232 233 public yyerror(s) 234 String s; 235 { 236 register char *p; 237 register integer start; 238 239 if (streq(s, "syntax error")) { 240 beginerrmsg(); 241 p = prevchar; 242 start = p - &linebuf[0]; 243 if (p > &linebuf[0]) { 244 while (lexclass[*p] == WHITE and p > &linebuf[0]) { 245 --p; 246 } 247 } 248 fprintf(stderr, "%s", linebuf); 249 if (start != 0) { 250 fprintf(stderr, "%*c", start, ' '); 251 } 252 if (p == &linebuf[0]) { 253 fprintf(stderr, "^ unrecognized command"); 254 } else { 255 fprintf(stderr, "^ syntax error"); 256 } 257 enderrmsg(); 258 } else { 259 error(s); 260 } 261 } 262 263 /* 264 * Eat the current line. 265 */ 266 267 public gobble() 268 { 269 curchar = linebuf; 270 linebuf[0] = '\0'; 271 } 272 273 /* 274 * Scan an identifier and check to see if it's a keyword. 275 */ 276 277 private Token getident() 278 { 279 char buf[256]; 280 register Char *p, *q; 281 register Token t; 282 283 p = curchar; 284 q = buf; 285 if (shellmode) { 286 do { 287 *q++ = *p++; 288 } while (index(" \t\n!&<>*[]()'\"", *p) == nil); 289 } else { 290 do { 291 *q++ = *p++; 292 } while (isalnum(*p)); 293 } 294 curchar = p; 295 *q = '\0'; 296 yylval.y_name = identname(buf, false); 297 if (not shellmode) { 298 t = findkeyword(yylval.y_name); 299 if (t == nil) { 300 t = NAME; 301 } 302 } else { 303 t = NAME; 304 } 305 return t; 306 } 307 308 /* 309 * Scan a number. 310 */ 311 312 private Token getnum() 313 { 314 char buf[256]; 315 register Char *p, *q; 316 register Token t; 317 Integer base; 318 319 p = curchar; 320 q = buf; 321 if (*p == '0') { 322 if (*(p+1) == 'x') { 323 p += 2; 324 base = 16; 325 } else { 326 base = 8; 327 } 328 } else { 329 base = 10; 330 } 331 if (base == 16) { 332 do { 333 *q++ = *p++; 334 } while (ishexdigit(*p)); 335 } else { 336 do { 337 *q++ = *p++; 338 } while (isdigit(*p)); 339 } 340 if (*p == '.') { 341 do { 342 *q++ = *p++; 343 } while (isdigit(*p)); 344 if (*p == 'e' or *p == 'E') { 345 p++; 346 if (*p == '+' or *p == '-' or isdigit(*p)) { 347 *q++ = 'e'; 348 do { 349 *q++ = *p++; 350 } while (isdigit(*p)); 351 } 352 } 353 *q = '\0'; 354 yylval.y_real = atof(buf); 355 t = REAL; 356 } else { 357 *q = '\0'; 358 switch (base) { 359 case 10: 360 yylval.y_int = atol(buf); 361 break; 362 363 case 8: 364 yylval.y_int = octal(buf); 365 break; 366 367 case 16: 368 yylval.y_int = hex(buf); 369 break; 370 371 default: 372 badcaseval(base); 373 } 374 t = INT; 375 } 376 curchar = p; 377 return t; 378 } 379 380 /* 381 * Convert a string of octal digits to an integer. 382 */ 383 384 private int octal(s) 385 String s; 386 { 387 register Char *p; 388 register Integer n; 389 390 n = 0; 391 for (p = s; *p != '\0'; p++) { 392 n = 8*n + (*p - '0'); 393 } 394 return n; 395 } 396 397 /* 398 * Convert a string of hexadecimal digits to an integer. 399 */ 400 401 private int hex(s) 402 String s; 403 { 404 register Char *p; 405 register Integer n; 406 407 n = 0; 408 for (p = s; *p != '\0'; p++) { 409 n *= 16; 410 if (*p >= 'a' and *p <= 'f') { 411 n += (*p - 'a' + 10); 412 } else if (*p >= 'A' and *p <= 'F') { 413 n += (*p - 'A' + 10); 414 } else { 415 n += (*p - '0'); 416 } 417 } 418 return n; 419 } 420 421 /* 422 * Scan a string. 423 */ 424 425 private Token getstring() 426 { 427 char buf[256]; 428 register Char *p, *q; 429 Boolean endofstring; 430 431 p = curchar; 432 q = buf; 433 endofstring = false; 434 while (not endofstring) { 435 if (*p == '\n' or *p == '\0') { 436 error("non-terminated string"); 437 endofstring = true; 438 } else if (*p == '"' or *p == '\'') { 439 if (*(p+1) != *p) { 440 endofstring = true; 441 } else { 442 *q++ = *p; 443 } 444 } else { 445 curchar = p; 446 *q++ = charcon(p); 447 p = curchar; 448 } 449 p++; 450 } 451 curchar = p; 452 *q = '\0'; 453 yylval.y_string = strdup(buf); 454 return STRING; 455 } 456 457 /* 458 * Process a character constant. 459 * Watch out for backslashes. 460 */ 461 462 private Char charcon(p) 463 char *p; 464 { 465 char c, buf[10], *q; 466 467 if (*p == '\\') { 468 ++p; 469 if (*p != '\\') { 470 q = buf; 471 do { 472 *q++ = *p++; 473 } while (*p != '\\' and *p != '\'' and *p != '\n' and *p != '\0'); 474 *q = '\0'; 475 if (isdigit(buf[0])) { 476 c = (Char) octal(buf); 477 } else { 478 c = charlookup(buf); 479 } 480 curchar = p - 1; 481 } else { 482 c = '\\'; 483 } 484 } else { 485 c = *p; 486 } 487 return c; 488 } 489 490 /* 491 * Do a lookup for a ASCII character name. 492 */ 493 494 private String ascii[] = { 495 "NUL", "SOH", "STX", "ETX", "EOT", "ENQ", "ACK", "BEL", 496 "BS", "HT", "NL", "VT", "NP", "CR", "SO", "SI", 497 "DLE", "DC1", "DC2", "DC3", "DC4", "NAK", "SYN", "ETB", 498 "CAN", "EM", "SUB", "ESC", "FS", "GS", "RS", "US", 499 "SP", nil 500 }; 501 502 private char charlookup(s) 503 String s; 504 { 505 register int i; 506 507 for (i = 0; ascii[i] != NULL; i++) { 508 if (streq(s, ascii[i])) { 509 return i; 510 } 511 } 512 if (streq(s, "DEL")) { 513 return 0177; 514 } 515 error("unknown ascii name \"%s\"", s); 516 return '?'; 517 } 518 519 /* 520 * Input file management routines. 521 */ 522 523 public setinput(filename) 524 Filename filename; 525 { 526 File f; 527 528 f = fopen(filename, "r"); 529 if (f == nil) { 530 error("can't open %s", filename); 531 } else { 532 if (curinclindex >= MAXINCLDEPTH) { 533 error("unreasonable input nesting on \"%s\"", filename); 534 } 535 inclinfo[curinclindex].savefile = in; 536 inclinfo[curinclindex].savefn = errfilename; 537 inclinfo[curinclindex].savelineno = errlineno; 538 curinclindex++; 539 in = f; 540 errfilename = filename; 541 errlineno = 1; 542 } 543 } 544 545 private Boolean eofinput() 546 { 547 register Boolean b; 548 549 if (curinclindex == 0) { 550 if (isterm(in)) { 551 putchar('\n'); 552 clearerr(in); 553 b = false; 554 } else { 555 b = true; 556 } 557 } else { 558 fclose(in); 559 --curinclindex; 560 in = inclinfo[curinclindex].savefile; 561 errfilename = inclinfo[curinclindex].savefn; 562 errlineno = inclinfo[curinclindex].savelineno; 563 b = false; 564 } 565 return b; 566 } 567 568 /* 569 * Pop the current input. Return whether successful. 570 */ 571 572 public Boolean popinput() 573 { 574 Boolean b; 575 576 if (curinclindex == 0) { 577 b = false; 578 } else { 579 b = (Boolean) (not eofinput()); 580 } 581 return b; 582 } 583 584 /* 585 * Return whether we are currently reading from standard input. 586 */ 587 588 public Boolean isstdin() 589 { 590 return (Boolean) (in == stdin); 591 } 592 593 /* 594 * Send the current line to the shell. 595 */ 596 597 public shellline() 598 { 599 register char *p; 600 601 p = curchar; 602 while (*p != '\0' and (*p == '\n' or lexclass[*p] == WHITE)) { 603 ++p; 604 } 605 shell(p); 606 if (*p == '\0' and isterm(in)) { 607 putchar('\n'); 608 } 609 erecover(); 610 } 611 612 /* 613 * Read the rest of the current line in "shell mode". 614 */ 615 616 public beginshellmode() 617 { 618 shellmode = true; 619 } 620 621 /* 622 * Print out a token for debugging. 623 */ 624 625 public print_token(f, t) 626 File f; 627 Token t; 628 { 629 if (t == '\n') { 630 fprintf(f, "char '\\n'"); 631 } else if (t == EOF) { 632 fprintf(f, "EOF"); 633 } else if (t < 256) { 634 fprintf(f, "char '%c'", t); 635 } else { 636 fprintf(f, "\"%s\"", keywdstring(t)); 637 } 638 } 639