xref: /csrg-svn/old/dbx/scanner.c (revision 12120)
1 /* Copyright (c) 1982 Regents of the University of California */
2 
3 static char sccsid[] = "@(#)scanner.c 1.6 04/29/83";
4 
5 /*
6  * Debugger scanner.
7  */
8 
9 #include "defs.h"
10 #include "scanner.h"
11 #include "main.h"
12 #include "keywords.h"
13 #include "tree.h"
14 #include "symbols.h"
15 #include "names.h"
16 #include "y.tab.h"
17 
18 #ifndef public
19 typedef int Token;
20 #endif
21 
22 public String initfile = ".dbxinit";
23 
24 typedef enum { WHITE, ALPHA, NUM, OTHER } Charclass;
25 
26 private Charclass class[256 + 1];
27 private Charclass *lexclass = class + 1;
28 
29 #define isdigit(c) (lexclass[c] == NUM)
30 #define isalnum(c) (lexclass[c] == ALPHA or lexclass[c] == NUM)
31 #define ishexdigit(c) ( \
32     isdigit(c) or (c >= 'a' and c <= 'f') or (c >= 'A' and c <= 'F') \
33 )
34 
35 #define MAXLINESIZE 1024
36 
37 private File in;
38 private Char linebuf[MAXLINESIZE];
39 private Char *curchar;
40 
41 #define MAXINCLDEPTH 10
42 
43 private struct {
44     File savefile;
45     Filename savefn;
46     int savelineno;
47 } inclinfo[MAXINCLDEPTH];
48 
49 private unsigned int curinclindex;
50 
51 private Token getident();
52 private Token getnum();
53 private Token getstring();
54 private Boolean eofinput();
55 private Char charcon();
56 private Char charlookup();
57 
58 private enterlexclass(class, s)
59 Charclass class;
60 String s;
61 {
62     register char *p;
63 
64     for (p = s; *p != '\0'; p++) {
65 	lexclass[*p] = class;
66     }
67 }
68 
69 public scanner_init()
70 {
71     register Integer i;
72 
73     for (i = 0; i < 257; i++) {
74 	class[i] = OTHER;
75     }
76     enterlexclass(WHITE, " \t");
77     enterlexclass(ALPHA, "abcdefghijklmnopqrstuvwxyz");
78     enterlexclass(ALPHA, "ABCDEFGHIJKLMNOPQRSTUVWXYZ_$");
79     enterlexclass(NUM, "0123456789");
80     in = stdin;
81     errfilename = nil;
82     errlineno = 0;
83     curchar = linebuf;
84     linebuf[0] = '\0';
85 }
86 
87 /*
88  * Read a single token.
89  *
90  * Input is line buffered.
91  *
92  * There are two "modes" of operation:  one as in a compiler,
93  * and one for reading shell-like syntax.
94  */
95 
96 private Boolean shellmode;
97 
98 public Token yylex()
99 {
100     register int c;
101     register char *p;
102     register Token t;
103     String line;
104 
105     p = curchar;
106     if (*p == '\0') {
107 	do {
108 	    if (isterm(in)) {
109 		printf("> ");
110 		fflush(stdout);
111 	    }
112 	    line = fgets(linebuf, MAXLINESIZE, in);
113 	} while (line == nil and not eofinput());
114 	if (line == nil) {
115 	    c = EOF;
116 	} else {
117 	    p = linebuf;
118 	    while (lexclass[*p] == WHITE) {
119 		p++;
120 	    }
121 	    shellmode = false;
122 	}
123     } else {
124 	while (lexclass[*p] == WHITE) {
125 	    p++;
126 	}
127     }
128     curchar = p;
129     c = *p;
130     if (lexclass[c] == ALPHA) {
131 	t = getident();
132     } else if (lexclass[c] == NUM) {
133 	if (shellmode) {
134 	    t = getident();
135 	} else {
136 	    t = getnum();
137 	}
138     } else {
139 	++curchar;
140 	switch (c) {
141 	    case '\n':
142 		t = '\n';
143 		if (errlineno != 0) {
144 		    errlineno++;
145 		}
146 		break;
147 
148 	    case '"':
149 	    case '\'':
150 		t = getstring();
151 		break;
152 
153 	    case '.':
154 		if (shellmode) {
155 		    --curchar;
156 		    t = getident();
157 		} else if (isdigit(*curchar)) {
158 		    --curchar;
159 		    t = getnum();
160 		} else {
161 		    t = '.';
162 		}
163 		break;
164 
165 	    case '<':
166 		if (not shellmode and *curchar == '<') {
167 		    ++curchar;
168 		    t = LFORMER;
169 		} else {
170 		    t = '<';
171 		}
172 		break;
173 
174 	    case '>':
175 		if (not shellmode and *curchar == '>') {
176 		    ++curchar;
177 		    t = RFORMER;
178 		} else {
179 		    t = '>';
180 		}
181 		break;
182 
183 	    case '#':
184 		if (*curchar == '^') {
185 		    ++curchar;
186 		    t = ABSTRACTION;
187 		} else {
188 		    t = '#';
189 		}
190 		break;
191 
192 	    case '-':
193 		if (shellmode) {
194 		    --curchar;
195 		    t = getident();
196 		} else if (*curchar == '>') {
197 		    ++curchar;
198 		    t = ARROW;
199 		} else {
200 		    t = '-';
201 		}
202 		break;
203 
204 	    case EOF:
205 		t = 0;
206 		break;
207 
208 	    default:
209 		if (shellmode and index("!&*()[]", c) == nil) {
210 		    --curchar;
211 		    t = getident();
212 		} else {
213 		    t = c;
214 		}
215 		break;
216 	}
217     }
218 #   ifdef LEXDEBUG
219 	if (lexdebug) {
220 	    fprintf(stderr, "yylex returns ");
221 	    print_token(stderr, t);
222 	    fprintf(stderr, "\n");
223 	}
224 #   endif
225     return t;
226 }
227 
228 /*
229  * Parser error handling.
230  */
231 
232 public yyerror(s)
233 String s;
234 {
235     register Char *p, *tokenbegin, *tokenend;
236     register Integer len;
237 
238     if (streq(s, "syntax error")) {
239 	beginerrmsg();
240 	tokenend = curchar - 1;
241 	tokenbegin = tokenend;
242 	while (lexclass[*tokenbegin] != WHITE and tokenbegin > &linebuf[0]) {
243 	    --tokenbegin;
244 	}
245 	len = tokenend - tokenbegin + 1;
246 	p = tokenbegin;
247 	if (p > &linebuf[0]) {
248 	    while (lexclass[*p] == WHITE and p > &linebuf[0]) {
249 		--p;
250 	    }
251 	}
252 	if (p == &linebuf[0]) {
253 	    fprintf(stderr, "unrecognized command \"%.*s\"", len, tokenbegin);
254 	} else {
255 	    fprintf(stderr, "syntax error");
256 	    if (len != 0) {
257 		fprintf(stderr, " on \"%.*s\"", len, tokenbegin);
258 	    }
259 	}
260 	enderrmsg();
261     } else {
262 	error(s);
263     }
264 }
265 
266 /*
267  * Eat the current line.
268  */
269 
270 public gobble()
271 {
272     curchar = linebuf;
273     linebuf[0] = '\0';
274 }
275 
276 /*
277  * Scan an identifier and check to see if it's a keyword.
278  */
279 
280 private Token getident()
281 {
282     char buf[256];
283     register Char *p, *q;
284     register Token t;
285 
286     p = curchar;
287     q = buf;
288     if (shellmode) {
289 	do {
290 	    *q++ = *p++;
291 	} while (index(" \t\n!&<>*[]()", *p) == nil);
292     } else {
293 	do {
294 	    *q++ = *p++;
295 	} while (isalnum(*p));
296     }
297     curchar = p;
298     *q = '\0';
299     yylval.y_name = identname(buf, false);
300     if (not shellmode) {
301 	t = findkeyword(yylval.y_name);
302 	if (t == nil) {
303 	    t = NAME;
304 	}
305     } else {
306 	t = NAME;
307     }
308     return t;
309 }
310 
311 /*
312  * Scan a number.
313  */
314 
315 private Token getnum()
316 {
317     char buf[256];
318     register Char *p, *q;
319     register Token t;
320     Integer base;
321 
322     p = curchar;
323     q = buf;
324     if (*p == '0') {
325 	if (*(p+1) == 'x') {
326 	    p += 2;
327 	    base = 16;
328 	} else {
329 	    base = 8;
330 	}
331     } else {
332 	base = 10;
333     }
334     if (base == 16) {
335 	do {
336 	    *q++ = *p++;
337 	} while (ishexdigit(*p));
338     } else {
339 	do {
340 	    *q++ = *p++;
341 	} while (isdigit(*p));
342     }
343     if (*p == '.') {
344 	do {
345 	    *q++ = *p++;
346 	} while (isdigit(*p));
347 	if (*p == 'e' or *p == 'E') {
348 	    p++;
349 	    if (*p == '+' or *p == '-' or isdigit(*p)) {
350 		*q++ = 'e';
351 		do {
352 		    *q++ = *p++;
353 		} while (isdigit(*p));
354 	    }
355 	}
356 	*q = '\0';
357 	yylval.y_real = atof(buf);
358 	t = REAL;
359     } else {
360 	*q = '\0';
361 	switch (base) {
362 	    case 10:
363 		yylval.y_int = atol(buf);
364 		break;
365 
366 	    case 8:
367 		yylval.y_int = octal(buf);
368 		break;
369 
370 	    case 16:
371 		yylval.y_int = hex(buf);
372 		break;
373 
374 	    default:
375 		badcaseval(base);
376 	}
377 	t = INT;
378     }
379     curchar = p;
380     return t;
381 }
382 
383 /*
384  * Convert a string of octal digits to an integer.
385  */
386 
387 private int octal(s)
388 String s;
389 {
390     register Char *p;
391     register Integer n;
392 
393     n = 0;
394     for (p = s; *p != '\0'; p++) {
395 	n = 8*n + (*p - '0');
396     }
397     return n;
398 }
399 
400 /*
401  * Convert a string of hexadecimal digits to an integer.
402  */
403 
404 private int hex(s)
405 String s;
406 {
407     register Char *p;
408     register Integer n;
409 
410     n = 0;
411     for (p = s; *p != '\0'; p++) {
412 	n *= 16;
413 	if (*p >= 'a' and *p <= 'f') {
414 	    n += (*p - 'a' + 10);
415 	} else if (*p >= 'A' and *p <= 'F') {
416 	    n += (*p - 'A' + 10);
417 	} else {
418 	    n += (*p - '0');
419 	}
420     }
421     return n;
422 }
423 
424 /*
425  * Scan a string.
426  */
427 
428 private Token getstring()
429 {
430     char buf[256];
431     register Char *p, *q;
432     Boolean endofstring;
433 
434     p = curchar;
435     q = buf;
436     endofstring = false;
437     while (not endofstring) {
438 	if (*p == '\n' or *p == '\0') {
439 	    error("non-terminated string");
440 	    endofstring = true;
441 	} else if (*p == '"' or *p == '\'') {
442 	    if (*(p+1) != *p) {
443 		endofstring = true;
444 	    } else {
445 		*q++ = *p;
446 	    }
447 	} else {
448 	    *q++ = charcon(p);
449 	    p = curchar;
450 	}
451 	p++;
452     }
453     curchar = p;
454     *q = '\0';
455     yylval.y_string = strdup(buf);
456     return STRING;
457 }
458 
459 /*
460  * Process a character constant.
461  * Watch out for backslashes.
462  */
463 
464 private Char charcon(p)
465 char *p;
466 {
467     char c, buf[10], *q;
468 
469     if (*p == '\\') {
470 	++p;
471 	if (*p != '\\') {
472 	    q = buf;
473 	    do {
474 		*q++ = *p++;
475 	    } while (*p != '\\' and *p != '\'' and *p != '\n' and *p != '\0');
476 	    *q = '\0';
477 	    if (isdigit(buf[0])) {
478 		c = (Char) octal(buf);
479 	    } else {
480 		c = charlookup(buf);
481 	    }
482 	    curchar = p - 1;
483 	} else {
484 	    c = '\\';
485 	}
486     } else {
487 	c = *p;
488     }
489     return c;
490 }
491 
492 /*
493  * Do a lookup for a ASCII character name.
494  */
495 
496 private String ascii[] = {
497     "NUL", "SOH", "STX", "ETX", "EOT", "ENQ", "ACK", "BEL",
498     "BS",  "HT",  "NL",  "VT",  "NP",  "CR",  "SO",  "SI",
499     "DLE", "DC1", "DC2", "DC3", "DC4", "NAK", "SYN", "ETB",
500     "CAN", "EM",  "SUB", "ESC", "FS",  "GS",  "RS",  "US",
501     "SP", nil
502 };
503 
504 private char charlookup(s)
505 String s;
506 {
507     register int i;
508 
509     for (i = 0; ascii[i] != NULL; i++) {
510 	if (streq(s, ascii[i])) {
511 	    return i;
512 	}
513     }
514     if (streq(s, "DEL")) {
515 	return 0177;
516     }
517     error("unknown ascii name \"%s\"", s);
518     return '?';
519 }
520 
521 /*
522  * Input file management routines.
523  */
524 
525 public setinput(filename)
526 Filename filename;
527 {
528     File f;
529 
530     f = fopen(filename, "r");
531     if (f == nil) {
532 	error("can't open %s", filename);
533     } else {
534 	if (curinclindex >= MAXINCLDEPTH) {
535 	    error("unreasonable input nesting on \"%s\"", filename);
536 	}
537 	inclinfo[curinclindex].savefile = in;
538 	inclinfo[curinclindex].savefn = errfilename;
539 	inclinfo[curinclindex].savelineno = errlineno;
540 	curinclindex++;
541 	in = f;
542 	errfilename = filename;
543 	errlineno = 1;
544     }
545 }
546 
547 private Boolean eofinput()
548 {
549     register Boolean b;
550 
551     if (curinclindex == 0) {
552 	if (isterm(in)) {
553 	    putchar('\n');
554 	    b = false;
555 	} else {
556 	    b = true;
557 	}
558     } else {
559 	fclose(in);
560 	--curinclindex;
561 	in = inclinfo[curinclindex].savefile;
562 	errfilename = inclinfo[curinclindex].savefn;
563 	errlineno = inclinfo[curinclindex].savelineno;
564 	b = false;
565     }
566     return b;
567 }
568 
569 /*
570  * Pop the current input.  Return whether successful.
571  */
572 
573 public Boolean popinput()
574 {
575     Boolean b;
576 
577     if (curinclindex == 0) {
578 	b = false;
579     } else {
580 	b = (Boolean) (not eofinput());
581     }
582     return b;
583 }
584 
585 /*
586  * Return whether we are currently reading from standard input.
587  */
588 
589 public Boolean isstdin()
590 {
591     return (Boolean) (in == stdin);
592 }
593 
594 /*
595  * Send the current line to the shell.
596  */
597 
598 public shellline()
599 {
600     register char *p;
601 
602     p = curchar;
603     while (*p != '\0' and (*p == '\n' or lexclass[*p] == WHITE)) {
604 	++p;
605     }
606     shell(p);
607     if (*p == '\0' and isterm(in)) {
608 	putchar('\n');
609     }
610     erecover();
611 }
612 
613 /*
614  * Read the rest of the current line in "shell mode".
615  */
616 
617 public beginshellmode()
618 {
619     shellmode = true;
620 }
621 
622 /*
623  * Print out a token for debugging.
624  */
625 
626 public print_token(f, t)
627 File f;
628 Token t;
629 {
630     if (t == '\n') {
631 	fprintf(f, "char '\\n'");
632     } else if (t == EOF) {
633 	fprintf(f, "EOF");
634     } else if (t < 256) {
635 	fprintf(f, "char '%c'", t);
636     } else {
637 	fprintf(f, "\"%s\"", keywdstring(t));
638     }
639 }
640