xref: /csrg-svn/old/dbx/scanner.c (revision 11559)
1 /* Copyright (c) 1982 Regents of the University of California */
2 
3 static char sccsid[] = "@(#)scanner.c 1.4 03/13/83";
4 
5 /*
6  * Debugger scanner.
7  */
8 
9 #include "defs.h"
10 #include "scanner.h"
11 #include "main.h"
12 #include "keywords.h"
13 #include "tree.h"
14 #include "symbols.h"
15 #include "names.h"
16 #include "y.tab.h"
17 
18 #ifndef public
19 typedef int Token;
20 #endif
21 
22 public String initfile = ".dbxinit";
23 
24 typedef enum { WHITE, ALPHA, NUM, OTHER } Charclass;
25 
26 private Charclass class[256 + 1];
27 private Charclass *lexclass = class + 1;
28 
29 #define isdigit(c) (lexclass[c] == NUM)
30 #define isalnum(c) (lexclass[c] == ALPHA or lexclass[c] == NUM)
31 #define ishexdigit(c) ( \
32     isdigit(c) or (c >= 'a' and c <= 'f') or (c >= 'A' and c <= 'F') \
33 )
34 
35 #define MAXLINESIZE 1024
36 
37 private File in;
38 private Char linebuf[MAXLINESIZE];
39 private Char *curchar;
40 
41 #define MAXINCLDEPTH 10
42 
43 private struct {
44     File savefile;
45     Filename savefn;
46     int savelineno;
47 } inclinfo[MAXINCLDEPTH];
48 
49 private unsigned int curinclindex;
50 
51 private Boolean firsttoken = true;
52 private Boolean firstinit = true;
53 
54 private Token getident();
55 private Token getnum();
56 private Token getstring();
57 private Boolean eofinput();
58 private Char charcon();
59 private Char charlookup();
60 
61 private enterlexclass(class, s)
62 Charclass class;
63 String s;
64 {
65     register char *p;
66 
67     for (p = s; *p != '\0'; p++) {
68 	lexclass[*p] = class;
69     }
70 }
71 
72 public scanner_init()
73 {
74     register Integer i;
75 
76     for (i = 0; i < 257; i++) {
77 	class[i] = OTHER;
78     }
79     enterlexclass(WHITE, " \t");
80     enterlexclass(ALPHA, "abcdefghijklmnopqrstuvwxyz");
81     enterlexclass(ALPHA, "ABCDEFGHIJKLMNOPQRSTUVWXYZ_$");
82     enterlexclass(NUM, "0123456789");
83     in = stdin;
84     errfilename = nil;
85     errlineno = 0;
86     curchar = linebuf;
87     linebuf[0] = '\0';
88     if (runfirst) {
89 	firstinit = false;
90 	firsttoken = false;
91     } else if (firstinit and isterm(in)) {
92 	firstinit = false;
93 	printf("> ");
94 	fflush(stdout);
95     }
96 }
97 
98 /*
99  * Read a single token.
100  *
101  * Input is line buffered.
102  *
103  * There are two "modes" of operation:  one as in a compiler,
104  * and one for reading shell-like syntax.
105  */
106 
107 private Boolean shellmode;
108 
109 public Token yylex()
110 {
111     register int c;
112     register char *p;
113     register Token t;
114     String line;
115 
116     p = curchar;
117     if (*p == '\0') {
118 	do {
119 	    if (isterm(in)) {
120 		if (firsttoken) {
121 		    firsttoken = false;
122 		} else {
123 		    printf("> ");
124 		    fflush(stdout);
125 		}
126 	    }
127 	    line = fgets(linebuf, MAXLINESIZE, in);
128 	} while (line == nil and not eofinput());
129 	if (line == nil) {
130 	    c = EOF;
131 	} else {
132 	    p = linebuf;
133 	    while (lexclass[*p] == WHITE) {
134 		p++;
135 	    }
136 	    shellmode = false;
137 	}
138     } else {
139 	while (lexclass[*p] == WHITE) {
140 	    p++;
141 	}
142     }
143     curchar = p;
144     c = *p;
145     if (lexclass[c] == ALPHA) {
146 	t = getident();
147     } else if (lexclass[c] == NUM) {
148 	t = getnum();
149     } else {
150 	++curchar;
151 	switch (c) {
152 	    case '\n':
153 		t = '\n';
154 		if (errlineno != 0) {
155 		    errlineno++;
156 		}
157 		break;
158 
159 	    case '"':
160 	    case '\'':
161 		t = getstring();
162 		break;
163 
164 	    case '.':
165 		if (shellmode) {
166 		    --curchar;
167 		    t = getident();
168 		} else if (isdigit(*curchar)) {
169 		    --curchar;
170 		    t = getnum();
171 		} else {
172 		    t = '.';
173 		}
174 		break;
175 
176 	    case '<':
177 		if (not shellmode and *curchar == '<') {
178 		    ++curchar;
179 		    t = LFORMER;
180 		} else {
181 		    t = '<';
182 		}
183 		break;
184 
185 	    case '>':
186 		if (not shellmode and *curchar == '>') {
187 		    ++curchar;
188 		    t = RFORMER;
189 		} else {
190 		    t = '>';
191 		}
192 		break;
193 
194 	    case '#':
195 		if (*curchar == '^') {
196 		    ++curchar;
197 		    t = ABSTRACTION;
198 		} else {
199 		    t = '#';
200 		}
201 		break;
202 
203 	    case '-':
204 		if (shellmode) {
205 		    --curchar;
206 		    t = getident();
207 		} else if (*curchar == '>') {
208 		    ++curchar;
209 		    t = ARROW;
210 		} else {
211 		    t = '-';
212 		}
213 		break;
214 
215 	    case EOF:
216 		t = 0;
217 		break;
218 
219 	    default:
220 		if (shellmode and index("!&*()[]", c) == nil) {
221 		    --curchar;
222 		    t = getident();
223 		} else {
224 		    t = c;
225 		}
226 		break;
227 	}
228     }
229 #   ifdef LEXDEBUG
230 	if (lexdebug) {
231 	    fprintf(stderr, "yylex returns ");
232 	    print_token(stderr, t);
233 	    fprintf(stderr, "\n");
234 	}
235 #   endif
236     return t;
237 }
238 
239 /*
240  * Parser error handling.
241  */
242 
243 public yyerror(s)
244 String s;
245 {
246     register Char *p, *tokenbegin, *tokenend;
247     register Integer len;
248 
249     if (streq(s, "syntax error")) {
250 	beginerrmsg();
251 	tokenend = curchar - 1;
252 	tokenbegin = tokenend;
253 	while (lexclass[*tokenbegin] != WHITE and tokenbegin > &linebuf[0]) {
254 	    --tokenbegin;
255 	}
256 	len = tokenend - tokenbegin + 1;
257 	p = tokenbegin;
258 	if (p > &linebuf[0]) {
259 	    while (lexclass[*p] == WHITE and p > &linebuf[0]) {
260 		--p;
261 	    }
262 	}
263 	if (p == &linebuf[0]) {
264 	    fprintf(stderr, "unrecognized command \"%.*s\"", len, tokenbegin);
265 	} else {
266 	    fprintf(stderr, "syntax error");
267 	    if (len != 0) {
268 		fprintf(stderr, " on \"%.*s\"", len, tokenbegin);
269 	    }
270 	}
271 	enderrmsg();
272     } else {
273 	error(s);
274     }
275 }
276 
277 /*
278  * Eat the current line.
279  */
280 
281 public gobble()
282 {
283     curchar = linebuf;
284     linebuf[0] = '\0';
285 }
286 
287 /*
288  * Scan an identifier and check to see if it's a keyword.
289  */
290 
291 private Token getident()
292 {
293     char buf[256];
294     register Char *p, *q;
295     register Token t;
296 
297     p = curchar;
298     q = buf;
299     if (shellmode) {
300 	do {
301 	    *q++ = *p++;
302 	} while (index(" \t\n!&<>*[]()", *p) == nil);
303     } else {
304 	do {
305 	    *q++ = *p++;
306 	} while (isalnum(*p));
307     }
308     curchar = p;
309     *q = '\0';
310     yylval.y_name = identname(buf, false);
311     if (not shellmode) {
312 	t = findkeyword(yylval.y_name);
313 	if (t == nil) {
314 	    t = NAME;
315 	}
316     } else {
317 	t = NAME;
318     }
319     return t;
320 }
321 
322 /*
323  * Scan a number.
324  */
325 
326 private Token getnum()
327 {
328     char buf[256];
329     register Char *p, *q;
330     register Token t;
331     Integer base;
332 
333     p = curchar;
334     q = buf;
335     if (*p == '0') {
336 	if (*(p+1) == 'x') {
337 	    p += 2;
338 	    base = 16;
339 	} else {
340 	    base = 8;
341 	}
342     } else {
343 	base = 10;
344     }
345     if (base == 16) {
346 	do {
347 	    *q++ = *p++;
348 	} while (ishexdigit(*p));
349     } else {
350 	do {
351 	    *q++ = *p++;
352 	} while (isdigit(*p));
353     }
354     if (*p == '.') {
355 	do {
356 	    *q++ = *p++;
357 	} while (isdigit(*p));
358 	if (*p == 'e' or *p == 'E') {
359 	    p++;
360 	    if (*p == '+' or *p == '-' or isdigit(*p)) {
361 		*q++ = 'e';
362 		do {
363 		    *q++ = *p++;
364 		} while (isdigit(*p));
365 	    }
366 	}
367 	*q = '\0';
368 	yylval.y_real = atof(buf);
369 	t = REAL;
370     } else {
371 	*q = '\0';
372 	switch (base) {
373 	    case 10:
374 		yylval.y_int = atol(buf);
375 		break;
376 
377 	    case 8:
378 		yylval.y_int = octal(buf);
379 		break;
380 
381 	    case 16:
382 		yylval.y_int = hex(buf);
383 		break;
384 
385 	    default:
386 		badcaseval(base);
387 	}
388 	t = INT;
389     }
390     curchar = p;
391     return t;
392 }
393 
394 /*
395  * Convert a string of octal digits to an integer.
396  */
397 
398 private int octal(s)
399 String s;
400 {
401     register Char *p;
402     register Integer n;
403 
404     n = 0;
405     for (p = s; *p != '\0'; p++) {
406 	n = 8*n + (*p - '0');
407     }
408     return n;
409 }
410 
411 /*
412  * Convert a string of hexadecimal digits to an integer.
413  */
414 
415 private int hex(s)
416 String s;
417 {
418     register Char *p;
419     register Integer n;
420 
421     n = 0;
422     for (p = s; *p != '\0'; p++) {
423 	n *= 16;
424 	if (*p >= 'a' and *p <= 'f') {
425 	    n += (*p - 'a' + 10);
426 	} else if (*p >= 'A' and *p <= 'F') {
427 	    n += (*p - 'A' + 10);
428 	} else {
429 	    n += (*p - '0');
430 	}
431     }
432     return n;
433 }
434 
435 /*
436  * Scan a string.
437  */
438 
439 private Token getstring()
440 {
441     char buf[256];
442     register Char *p, *q;
443     Boolean endofstring;
444 
445     p = curchar;
446     q = buf;
447     endofstring = false;
448     while (not endofstring) {
449 	if (*p == '\n' or *p == '\0') {
450 	    error("non-terminated string");
451 	    endofstring = true;
452 	} else if (*p == '"' or *p == '\'') {
453 	    if (*(p+1) != *p) {
454 		endofstring = true;
455 	    } else {
456 		*q++ = *p;
457 	    }
458 	} else {
459 	    *q++ = charcon(p);
460 	    p = curchar;
461 	}
462 	p++;
463     }
464     curchar = p;
465     *q = '\0';
466     yylval.y_string = strdup(buf);
467     return STRING;
468 }
469 
470 /*
471  * Process a character constant.
472  * Watch out for backslashes.
473  */
474 
475 private Char charcon(p)
476 char *p;
477 {
478     char c, buf[10], *q;
479 
480     if (*p == '\\') {
481 	++p;
482 	if (*p != '\\') {
483 	    q = buf;
484 	    do {
485 		*q++ = *p++;
486 	    } while (*p != '\\' and *p != '\'' and *p != '\n' and *p != '\0');
487 	    *q = '\0';
488 	    if (isdigit(buf[0])) {
489 		c = (Char) octal(buf);
490 	    } else {
491 		c = charlookup(buf);
492 	    }
493 	    curchar = p - 1;
494 	} else {
495 	    c = '\\';
496 	}
497     } else {
498 	c = *p;
499     }
500     return c;
501 }
502 
503 /*
504  * Do a lookup for a ASCII character name.
505  */
506 
507 private String ascii[] = {
508     "NUL", "SOH", "STX", "ETX", "EOT", "ENQ", "ACK", "BEL",
509     "BS",  "HT",  "NL",  "VT",  "NP",  "CR",  "SO",  "SI",
510     "DLE", "DC1", "DC2", "DC3", "DC4", "NAK", "SYN", "ETB",
511     "CAN", "EM",  "SUB", "ESC", "FS",  "GS",  "RS",  "US",
512     "SP", nil
513 };
514 
515 private char charlookup(s)
516 String s;
517 {
518     register int i;
519 
520     for (i = 0; ascii[i] != NULL; i++) {
521 	if (streq(s, ascii[i])) {
522 	    return i;
523 	}
524     }
525     if (streq(s, "DEL")) {
526 	return 0177;
527     }
528     error("unknown ascii name \"%s\"", s);
529     return '?';
530 }
531 
532 /*
533  * Input file management routines.
534  */
535 
536 public setinput(filename)
537 Filename filename;
538 {
539     File f;
540 
541     f = fopen(filename, "r");
542     if (f == nil) {
543 	error("can't open %s", filename);
544     } else {
545 	if (curinclindex >= MAXINCLDEPTH) {
546 	    error("unreasonable input nesting on \"%s\"", filename);
547 	}
548 	inclinfo[curinclindex].savefile = in;
549 	inclinfo[curinclindex].savefn = errfilename;
550 	inclinfo[curinclindex].savelineno = errlineno;
551 	curinclindex++;
552 	in = f;
553 	errfilename = filename;
554 	errlineno = 1;
555     }
556 }
557 
558 private Boolean eofinput()
559 {
560     register Boolean b;
561 
562     if (curinclindex == 0) {
563 	if (isterm(in)) {
564 	    putchar('\n');
565 	    b = false;
566 	} else {
567 	    b = true;
568 	}
569     } else {
570 	fclose(in);
571 	--curinclindex;
572 	in = inclinfo[curinclindex].savefile;
573 	errfilename = inclinfo[curinclindex].savefn;
574 	errlineno = inclinfo[curinclindex].savelineno;
575 	b = false;
576     }
577     return b;
578 }
579 
580 /*
581  * Pop the current input.  Return whether successful.
582  */
583 
584 public Boolean popinput()
585 {
586     Boolean b;
587 
588     if (curinclindex == 0) {
589 	b = false;
590     } else {
591 	b = (Boolean) (not eofinput());
592     }
593     return b;
594 }
595 
596 /*
597  * Return whether we are currently reading from standard input.
598  */
599 
600 public Boolean isstdin()
601 {
602     return (Boolean) (in == stdin);
603 }
604 
605 /*
606  * Send the current line to the shell.
607  */
608 
609 public shellline()
610 {
611     register char *p;
612 
613     p = curchar;
614     while (*p != '\0' and (*p == '\n' or lexclass[*p] == WHITE)) {
615 	++p;
616     }
617     shell(p);
618     if (*p == '\0' and isterm(in)) {
619 	putchar('\n');
620     }
621     erecover();
622 }
623 
624 /*
625  * Read the rest of the current line in "shell mode".
626  */
627 
628 public beginshellmode()
629 {
630     shellmode = true;
631 }
632 
633 /*
634  * Print out a token for debugging.
635  */
636 
637 public print_token(f, t)
638 File f;
639 Token t;
640 {
641     if (t == '\n') {
642 	fprintf(f, "char '\\n'");
643     } else if (t == EOF) {
644 	fprintf(f, "EOF");
645     } else if (t < 256) {
646 	fprintf(f, "char '%c'", t);
647     } else {
648 	fprintf(f, "\"%s\"", keywdstring(t));
649     }
650 }
651