xref: /csrg-svn/old/dbx/scanner.c (revision 16619)
1 /* Copyright (c) 1982 Regents of the University of California */
2 
3 static char sccsid[] = "@(#)scanner.c 1.8 8/5/83";
4 
5 static char rcsid[] = "$Header: scanner.c,v 1.3 84/03/27 10:23:50 linton Exp $";
6 
7 /*
8  * Debugger scanner.
9  */
10 
11 #include "defs.h"
12 #include "scanner.h"
13 #include "main.h"
14 #include "keywords.h"
15 #include "tree.h"
16 #include "symbols.h"
17 #include "names.h"
18 #include "y.tab.h"
19 
20 #ifndef public
21 typedef int Token;
22 #endif
23 
24 public String initfile = ".dbxinit";
25 
26 typedef enum { WHITE, ALPHA, NUM, OTHER } Charclass;
27 
28 private Charclass class[256 + 1];
29 private Charclass *lexclass = class + 1;
30 
31 #define isdigit(c) (lexclass[c] == NUM)
32 #define isalnum(c) (lexclass[c] == ALPHA or lexclass[c] == NUM)
33 #define ishexdigit(c) ( \
34     isdigit(c) or (c >= 'a' and c <= 'f') or (c >= 'A' and c <= 'F') \
35 )
36 
37 #define MAXLINESIZE 1024
38 
39 private File in;
40 private Char linebuf[MAXLINESIZE];
41 private Char *curchar, *prevchar;
42 
43 #define MAXINCLDEPTH 10
44 
45 private struct {
46     File savefile;
47     Filename savefn;
48     int savelineno;
49 } inclinfo[MAXINCLDEPTH];
50 
51 private unsigned int curinclindex;
52 
53 private Token getident();
54 private Token getnum();
55 private Token getstring();
56 private Boolean eofinput();
57 private Char charcon();
58 private Char charlookup();
59 
60 private enterlexclass(class, s)
61 Charclass class;
62 String s;
63 {
64     register char *p;
65 
66     for (p = s; *p != '\0'; p++) {
67 	lexclass[*p] = class;
68     }
69 }
70 
71 public scanner_init()
72 {
73     register Integer i;
74 
75     for (i = 0; i < 257; i++) {
76 	class[i] = OTHER;
77     }
78     enterlexclass(WHITE, " \t");
79     enterlexclass(ALPHA, "abcdefghijklmnopqrstuvwxyz");
80     enterlexclass(ALPHA, "ABCDEFGHIJKLMNOPQRSTUVWXYZ_$");
81     enterlexclass(NUM, "0123456789");
82     in = stdin;
83     errfilename = nil;
84     errlineno = 0;
85     curchar = linebuf;
86     linebuf[0] = '\0';
87 }
88 
89 /*
90  * Read a single token.
91  *
92  * Input is line buffered.
93  *
94  * There are two "modes" of operation:  one as in a compiler,
95  * and one for reading shell-like syntax.
96  */
97 
98 private Boolean shellmode;
99 
100 public Token yylex()
101 {
102     register int c;
103     register char *p;
104     register Token t;
105     String line;
106 
107     p = curchar;
108     if (*p == '\0') {
109 	do {
110 	    if (isterm(in)) {
111 		printf("(%s) ", cmdname);
112 		fflush(stdout);
113 	    }
114 	    line = fgets(linebuf, MAXLINESIZE, in);
115 	} while (line == nil and not eofinput());
116 	if (line == nil) {
117 	    c = EOF;
118 	} else {
119 	    p = linebuf;
120 	    while (lexclass[*p] == WHITE) {
121 		p++;
122 	    }
123 	    shellmode = false;
124 	}
125     } else {
126 	while (lexclass[*p] == WHITE) {
127 	    p++;
128 	}
129     }
130     curchar = p;
131     prevchar = curchar;
132     c = *p;
133     if (lexclass[c] == ALPHA) {
134 	t = getident();
135     } else if (lexclass[c] == NUM) {
136 	if (shellmode) {
137 	    t = getident();
138 	} else {
139 	    t = getnum();
140 	}
141     } else {
142 	++curchar;
143 	switch (c) {
144 	    case '\n':
145 		t = '\n';
146 		if (errlineno != 0) {
147 		    errlineno++;
148 		}
149 		break;
150 
151 	    case '"':
152 	    case '\'':
153 		t = getstring();
154 		break;
155 
156 	    case '.':
157 		if (shellmode) {
158 		    --curchar;
159 		    t = getident();
160 		} else if (isdigit(*curchar)) {
161 		    --curchar;
162 		    t = getnum();
163 		} else {
164 		    t = '.';
165 		}
166 		break;
167 
168 	    case '<':
169 		if (not shellmode and *curchar == '<') {
170 		    ++curchar;
171 		    t = LFORMER;
172 		} else {
173 		    t = '<';
174 		}
175 		break;
176 
177 	    case '>':
178 		if (not shellmode and *curchar == '>') {
179 		    ++curchar;
180 		    t = RFORMER;
181 		} else {
182 		    t = '>';
183 		}
184 		break;
185 
186 	    case '#':
187 		if (*curchar == '^') {
188 		    ++curchar;
189 		    t = ABSTRACTION;
190 		} else {
191 		    t = '#';
192 		}
193 		break;
194 
195 	    case '-':
196 		if (shellmode) {
197 		    --curchar;
198 		    t = getident();
199 		} else if (*curchar == '>') {
200 		    ++curchar;
201 		    t = ARROW;
202 		} else {
203 		    t = '-';
204 		}
205 		break;
206 
207 	    case EOF:
208 		t = 0;
209 		break;
210 
211 	    default:
212 		if (shellmode and index("!&*()[]", c) == nil) {
213 		    --curchar;
214 		    t = getident();
215 		} else {
216 		    t = c;
217 		}
218 		break;
219 	}
220     }
221 #   ifdef LEXDEBUG
222 	if (lexdebug) {
223 	    fprintf(stderr, "yylex returns ");
224 	    print_token(stderr, t);
225 	    fprintf(stderr, "\n");
226 	}
227 #   endif
228     return t;
229 }
230 
231 /*
232  * Parser error handling.
233  */
234 
235 public yyerror(s)
236 String s;
237 {
238     register char *p;
239     register integer start;
240 
241     if (streq(s, "syntax error")) {
242 	beginerrmsg();
243 	p = prevchar;
244 	start = p - &linebuf[0];
245 	if (p > &linebuf[0]) {
246 	    while (lexclass[*p] == WHITE and p > &linebuf[0]) {
247 		--p;
248 	    }
249 	}
250 	fprintf(stderr, "%s", linebuf);
251 	if (start != 0) {
252 	    fprintf(stderr, "%*c", start, ' ');
253 	}
254 	if (p == &linebuf[0]) {
255 	    fprintf(stderr, "^ unrecognized command");
256 	} else {
257 	    fprintf(stderr, "^ syntax error");
258 	}
259 	enderrmsg();
260     } else {
261 	error(s);
262     }
263 }
264 
265 /*
266  * Eat the current line.
267  */
268 
269 public gobble()
270 {
271     curchar = linebuf;
272     linebuf[0] = '\0';
273 }
274 
275 /*
276  * Scan an identifier and check to see if it's a keyword.
277  */
278 
279 private Token getident()
280 {
281     char buf[256];
282     register Char *p, *q;
283     register Token t;
284 
285     p = curchar;
286     q = buf;
287     if (shellmode) {
288 	do {
289 	    *q++ = *p++;
290 	} while (index(" \t\n!&<>*[]()'\"", *p) == nil);
291     } else {
292 	do {
293 	    *q++ = *p++;
294 	} while (isalnum(*p));
295     }
296     curchar = p;
297     *q = '\0';
298     yylval.y_name = identname(buf, false);
299     if (not shellmode) {
300 	t = findkeyword(yylval.y_name);
301 	if (t == nil) {
302 	    t = NAME;
303 	}
304     } else {
305 	t = NAME;
306     }
307     return t;
308 }
309 
310 /*
311  * Scan a number.
312  */
313 
314 private Token getnum()
315 {
316     char buf[256];
317     register Char *p, *q;
318     register Token t;
319     Integer base;
320 
321     p = curchar;
322     q = buf;
323     if (*p == '0') {
324 	if (*(p+1) == 'x') {
325 	    p += 2;
326 	    base = 16;
327 	} else {
328 	    base = 8;
329 	}
330     } else {
331 	base = 10;
332     }
333     if (base == 16) {
334 	do {
335 	    *q++ = *p++;
336 	} while (ishexdigit(*p));
337     } else {
338 	do {
339 	    *q++ = *p++;
340 	} while (isdigit(*p));
341     }
342     if (*p == '.') {
343 	do {
344 	    *q++ = *p++;
345 	} while (isdigit(*p));
346 	if (*p == 'e' or *p == 'E') {
347 	    p++;
348 	    if (*p == '+' or *p == '-' or isdigit(*p)) {
349 		*q++ = 'e';
350 		do {
351 		    *q++ = *p++;
352 		} while (isdigit(*p));
353 	    }
354 	}
355 	*q = '\0';
356 	yylval.y_real = atof(buf);
357 	t = REAL;
358     } else {
359 	*q = '\0';
360 	switch (base) {
361 	    case 10:
362 		yylval.y_int = atol(buf);
363 		break;
364 
365 	    case 8:
366 		yylval.y_int = octal(buf);
367 		break;
368 
369 	    case 16:
370 		yylval.y_int = hex(buf);
371 		break;
372 
373 	    default:
374 		badcaseval(base);
375 	}
376 	t = INT;
377     }
378     curchar = p;
379     return t;
380 }
381 
382 /*
383  * Convert a string of octal digits to an integer.
384  */
385 
386 private int octal(s)
387 String s;
388 {
389     register Char *p;
390     register Integer n;
391 
392     n = 0;
393     for (p = s; *p != '\0'; p++) {
394 	n = 8*n + (*p - '0');
395     }
396     return n;
397 }
398 
399 /*
400  * Convert a string of hexadecimal digits to an integer.
401  */
402 
403 private int hex(s)
404 String s;
405 {
406     register Char *p;
407     register Integer n;
408 
409     n = 0;
410     for (p = s; *p != '\0'; p++) {
411 	n *= 16;
412 	if (*p >= 'a' and *p <= 'f') {
413 	    n += (*p - 'a' + 10);
414 	} else if (*p >= 'A' and *p <= 'F') {
415 	    n += (*p - 'A' + 10);
416 	} else {
417 	    n += (*p - '0');
418 	}
419     }
420     return n;
421 }
422 
423 /*
424  * Scan a string.
425  */
426 
427 private Token getstring()
428 {
429     char buf[256];
430     register Char *p, *q;
431     Boolean endofstring;
432 
433     p = curchar;
434     q = buf;
435     endofstring = false;
436     while (not endofstring) {
437 	if (*p == '\n' or *p == '\0') {
438 	    error("non-terminated string");
439 	    endofstring = true;
440 	} else if (*p == '"' or *p == '\'') {
441 	    if (*(p+1) != *p) {
442 		endofstring = true;
443 	    } else {
444 		*q++ = *p;
445 	    }
446 	} else {
447 	    curchar = p;
448 	    *q++ = charcon(p);
449 	    p = curchar;
450 	}
451 	p++;
452     }
453     curchar = p;
454     *q = '\0';
455     yylval.y_string = strdup(buf);
456     return STRING;
457 }
458 
459 /*
460  * Process a character constant.
461  * Watch out for backslashes.
462  */
463 
464 private Char charcon(p)
465 char *p;
466 {
467     char c, buf[10], *q;
468 
469     if (*p == '\\') {
470 	++p;
471 	if (*p != '\\') {
472 	    q = buf;
473 	    do {
474 		*q++ = *p++;
475 	    } while (*p != '\\' and *p != '\'' and *p != '\n' and *p != '\0');
476 	    *q = '\0';
477 	    if (isdigit(buf[0])) {
478 		c = (Char) octal(buf);
479 	    } else {
480 		c = charlookup(buf);
481 	    }
482 	    curchar = p - 1;
483 	} else {
484 	    c = '\\';
485 	}
486     } else {
487 	c = *p;
488     }
489     return c;
490 }
491 
492 /*
493  * Do a lookup for a ASCII character name.
494  */
495 
496 private String ascii[] = {
497     "NUL", "SOH", "STX", "ETX", "EOT", "ENQ", "ACK", "BEL",
498     "BS",  "HT",  "NL",  "VT",  "NP",  "CR",  "SO",  "SI",
499     "DLE", "DC1", "DC2", "DC3", "DC4", "NAK", "SYN", "ETB",
500     "CAN", "EM",  "SUB", "ESC", "FS",  "GS",  "RS",  "US",
501     "SP", nil
502 };
503 
504 private char charlookup(s)
505 String s;
506 {
507     register int i;
508 
509     for (i = 0; ascii[i] != NULL; i++) {
510 	if (streq(s, ascii[i])) {
511 	    return i;
512 	}
513     }
514     if (streq(s, "DEL")) {
515 	return 0177;
516     }
517     error("unknown ascii name \"%s\"", s);
518     return '?';
519 }
520 
521 /*
522  * Input file management routines.
523  */
524 
525 public setinput(filename)
526 Filename filename;
527 {
528     File f;
529 
530     f = fopen(filename, "r");
531     if (f == nil) {
532 	error("can't open %s", filename);
533     } else {
534 	if (curinclindex >= MAXINCLDEPTH) {
535 	    error("unreasonable input nesting on \"%s\"", filename);
536 	}
537 	inclinfo[curinclindex].savefile = in;
538 	inclinfo[curinclindex].savefn = errfilename;
539 	inclinfo[curinclindex].savelineno = errlineno;
540 	curinclindex++;
541 	in = f;
542 	errfilename = filename;
543 	errlineno = 1;
544     }
545 }
546 
547 private Boolean eofinput()
548 {
549     register Boolean b;
550 
551     if (curinclindex == 0) {
552 	if (isterm(in)) {
553 	    putchar('\n');
554 	    clearerr(in);
555 	    b = false;
556 	} else {
557 	    b = true;
558 	}
559     } else {
560 	fclose(in);
561 	--curinclindex;
562 	in = inclinfo[curinclindex].savefile;
563 	errfilename = inclinfo[curinclindex].savefn;
564 	errlineno = inclinfo[curinclindex].savelineno;
565 	b = false;
566     }
567     return b;
568 }
569 
570 /*
571  * Pop the current input.  Return whether successful.
572  */
573 
574 public Boolean popinput()
575 {
576     Boolean b;
577 
578     if (curinclindex == 0) {
579 	b = false;
580     } else {
581 	b = (Boolean) (not eofinput());
582     }
583     return b;
584 }
585 
586 /*
587  * Return whether we are currently reading from standard input.
588  */
589 
590 public Boolean isstdin()
591 {
592     return (Boolean) (in == stdin);
593 }
594 
595 /*
596  * Send the current line to the shell.
597  */
598 
599 public shellline()
600 {
601     register char *p;
602 
603     p = curchar;
604     while (*p != '\0' and (*p == '\n' or lexclass[*p] == WHITE)) {
605 	++p;
606     }
607     shell(p);
608     if (*p == '\0' and isterm(in)) {
609 	putchar('\n');
610     }
611     erecover();
612 }
613 
614 /*
615  * Read the rest of the current line in "shell mode".
616  */
617 
618 public beginshellmode()
619 {
620     shellmode = true;
621 }
622 
623 /*
624  * Print out a token for debugging.
625  */
626 
627 public print_token(f, t)
628 File f;
629 Token t;
630 {
631     if (t == '\n') {
632 	fprintf(f, "char '\\n'");
633     } else if (t == EOF) {
634 	fprintf(f, "EOF");
635     } else if (t < 256) {
636 	fprintf(f, "char '%c'", t);
637     } else {
638 	fprintf(f, "\"%s\"", keywdstring(t));
639     }
640 }
641