xref: /csrg-svn/old/dbx/scanner.c (revision 16637)
1 /* Copyright (c) 1982 Regents of the University of California */
2 
3 static	char sccsid[] = "@(#)scanner.c	1.9 (Berkeley) 06/23/84";
4 
5 /*
6  * Debugger scanner.
7  */
8 
9 #include "defs.h"
10 #include "scanner.h"
11 #include "main.h"
12 #include "keywords.h"
13 #include "tree.h"
14 #include "symbols.h"
15 #include "names.h"
16 #include "y.tab.h"
17 
18 #ifndef public
19 typedef int Token;
20 #endif
21 
22 public String initfile = ".dbxinit";
23 
24 typedef enum { WHITE, ALPHA, NUM, OTHER } Charclass;
25 
26 private Charclass class[256 + 1];
27 private Charclass *lexclass = class + 1;
28 
29 #define isdigit(c) (lexclass[c] == NUM)
30 #define isalnum(c) (lexclass[c] == ALPHA or lexclass[c] == NUM)
31 #define ishexdigit(c) ( \
32     isdigit(c) or (c >= 'a' and c <= 'f') or (c >= 'A' and c <= 'F') \
33 )
34 
35 #define MAXLINESIZE 1024
36 
37 private File in;
38 private Char linebuf[MAXLINESIZE];
39 private Char *curchar, *prevchar;
40 
41 #define MAXINCLDEPTH 10
42 
43 private struct {
44     File savefile;
45     Filename savefn;
46     int savelineno;
47 } inclinfo[MAXINCLDEPTH];
48 
49 private unsigned int curinclindex;
50 
51 private Token getident();
52 private Token getnum();
53 private Token getstring();
54 private Boolean eofinput();
55 private Char charcon();
56 private Char charlookup();
57 
58 private enterlexclass(class, s)
59 Charclass class;
60 String s;
61 {
62     register char *p;
63 
64     for (p = s; *p != '\0'; p++) {
65 	lexclass[*p] = class;
66     }
67 }
68 
69 public scanner_init()
70 {
71     register Integer i;
72 
73     for (i = 0; i < 257; i++) {
74 	class[i] = OTHER;
75     }
76     enterlexclass(WHITE, " \t");
77     enterlexclass(ALPHA, "abcdefghijklmnopqrstuvwxyz");
78     enterlexclass(ALPHA, "ABCDEFGHIJKLMNOPQRSTUVWXYZ_$");
79     enterlexclass(NUM, "0123456789");
80     in = stdin;
81     errfilename = nil;
82     errlineno = 0;
83     curchar = linebuf;
84     linebuf[0] = '\0';
85 }
86 
87 /*
88  * Read a single token.
89  *
90  * Input is line buffered.
91  *
92  * There are two "modes" of operation:  one as in a compiler,
93  * and one for reading shell-like syntax.
94  */
95 
96 private Boolean shellmode;
97 
98 public Token yylex()
99 {
100     register int c;
101     register char *p;
102     register Token t;
103     String line;
104 
105     p = curchar;
106     if (*p == '\0') {
107 	do {
108 	    if (isterm(in)) {
109 		printf("(%s) ", cmdname);
110 		fflush(stdout);
111 	    }
112 	    line = fgets(linebuf, MAXLINESIZE, in);
113 	} while (line == nil and not eofinput());
114 	if (line == nil) {
115 	    c = EOF;
116 	} else {
117 	    p = linebuf;
118 	    while (lexclass[*p] == WHITE) {
119 		p++;
120 	    }
121 	    shellmode = false;
122 	}
123     } else {
124 	while (lexclass[*p] == WHITE) {
125 	    p++;
126 	}
127     }
128     curchar = p;
129     prevchar = curchar;
130     c = *p;
131     if (lexclass[c] == ALPHA) {
132 	t = getident();
133     } else if (lexclass[c] == NUM) {
134 	if (shellmode) {
135 	    t = getident();
136 	} else {
137 	    t = getnum();
138 	}
139     } else {
140 	++curchar;
141 	switch (c) {
142 	    case '\n':
143 		t = '\n';
144 		if (errlineno != 0) {
145 		    errlineno++;
146 		}
147 		break;
148 
149 	    case '"':
150 	    case '\'':
151 		t = getstring();
152 		break;
153 
154 	    case '.':
155 		if (shellmode) {
156 		    --curchar;
157 		    t = getident();
158 		} else if (isdigit(*curchar)) {
159 		    --curchar;
160 		    t = getnum();
161 		} else {
162 		    t = '.';
163 		}
164 		break;
165 
166 	    case '<':
167 		if (not shellmode and *curchar == '<') {
168 		    ++curchar;
169 		    t = LFORMER;
170 		} else {
171 		    t = '<';
172 		}
173 		break;
174 
175 	    case '>':
176 		if (not shellmode and *curchar == '>') {
177 		    ++curchar;
178 		    t = RFORMER;
179 		} else {
180 		    t = '>';
181 		}
182 		break;
183 
184 	    case '#':
185 		if (*curchar == '^') {
186 		    ++curchar;
187 		    t = ABSTRACTION;
188 		} else {
189 		    t = '#';
190 		}
191 		break;
192 
193 	    case '-':
194 		if (shellmode) {
195 		    --curchar;
196 		    t = getident();
197 		} else if (*curchar == '>') {
198 		    ++curchar;
199 		    t = ARROW;
200 		} else {
201 		    t = '-';
202 		}
203 		break;
204 
205 	    case EOF:
206 		t = 0;
207 		break;
208 
209 	    default:
210 		if (shellmode and index("!&*()[]", c) == nil) {
211 		    --curchar;
212 		    t = getident();
213 		} else {
214 		    t = c;
215 		}
216 		break;
217 	}
218     }
219 #   ifdef LEXDEBUG
220 	if (lexdebug) {
221 	    fprintf(stderr, "yylex returns ");
222 	    print_token(stderr, t);
223 	    fprintf(stderr, "\n");
224 	}
225 #   endif
226     return t;
227 }
228 
229 /*
230  * Parser error handling.
231  */
232 
233 public yyerror(s)
234 String s;
235 {
236     register char *p;
237     register integer start;
238 
239     if (streq(s, "syntax error")) {
240 	beginerrmsg();
241 	p = prevchar;
242 	start = p - &linebuf[0];
243 	if (p > &linebuf[0]) {
244 	    while (lexclass[*p] == WHITE and p > &linebuf[0]) {
245 		--p;
246 	    }
247 	}
248 	fprintf(stderr, "%s", linebuf);
249 	if (start != 0) {
250 	    fprintf(stderr, "%*c", start, ' ');
251 	}
252 	if (p == &linebuf[0]) {
253 	    fprintf(stderr, "^ unrecognized command");
254 	} else {
255 	    fprintf(stderr, "^ syntax error");
256 	}
257 	enderrmsg();
258     } else {
259 	error(s);
260     }
261 }
262 
263 /*
264  * Eat the current line.
265  */
266 
267 public gobble()
268 {
269     curchar = linebuf;
270     linebuf[0] = '\0';
271 }
272 
273 /*
274  * Scan an identifier and check to see if it's a keyword.
275  */
276 
277 private Token getident()
278 {
279     char buf[256];
280     register Char *p, *q;
281     register Token t;
282 
283     p = curchar;
284     q = buf;
285     if (shellmode) {
286 	do {
287 	    *q++ = *p++;
288 	} while (index(" \t\n!&<>*[]()'\"", *p) == nil);
289     } else {
290 	do {
291 	    *q++ = *p++;
292 	} while (isalnum(*p));
293     }
294     curchar = p;
295     *q = '\0';
296     yylval.y_name = identname(buf, false);
297     if (not shellmode) {
298 	t = findkeyword(yylval.y_name);
299 	if (t == nil) {
300 	    t = NAME;
301 	}
302     } else {
303 	t = NAME;
304     }
305     return t;
306 }
307 
308 /*
309  * Scan a number.
310  */
311 
312 private Token getnum()
313 {
314     char buf[256];
315     register Char *p, *q;
316     register Token t;
317     Integer base;
318 
319     p = curchar;
320     q = buf;
321     if (*p == '0') {
322 	if (*(p+1) == 'x') {
323 	    p += 2;
324 	    base = 16;
325 	} else {
326 	    base = 8;
327 	}
328     } else {
329 	base = 10;
330     }
331     if (base == 16) {
332 	do {
333 	    *q++ = *p++;
334 	} while (ishexdigit(*p));
335     } else {
336 	do {
337 	    *q++ = *p++;
338 	} while (isdigit(*p));
339     }
340     if (*p == '.') {
341 	do {
342 	    *q++ = *p++;
343 	} while (isdigit(*p));
344 	if (*p == 'e' or *p == 'E') {
345 	    p++;
346 	    if (*p == '+' or *p == '-' or isdigit(*p)) {
347 		*q++ = 'e';
348 		do {
349 		    *q++ = *p++;
350 		} while (isdigit(*p));
351 	    }
352 	}
353 	*q = '\0';
354 	yylval.y_real = atof(buf);
355 	t = REAL;
356     } else {
357 	*q = '\0';
358 	switch (base) {
359 	    case 10:
360 		yylval.y_int = atol(buf);
361 		break;
362 
363 	    case 8:
364 		yylval.y_int = octal(buf);
365 		break;
366 
367 	    case 16:
368 		yylval.y_int = hex(buf);
369 		break;
370 
371 	    default:
372 		badcaseval(base);
373 	}
374 	t = INT;
375     }
376     curchar = p;
377     return t;
378 }
379 
380 /*
381  * Convert a string of octal digits to an integer.
382  */
383 
384 private int octal(s)
385 String s;
386 {
387     register Char *p;
388     register Integer n;
389 
390     n = 0;
391     for (p = s; *p != '\0'; p++) {
392 	n = 8*n + (*p - '0');
393     }
394     return n;
395 }
396 
397 /*
398  * Convert a string of hexadecimal digits to an integer.
399  */
400 
401 private int hex(s)
402 String s;
403 {
404     register Char *p;
405     register Integer n;
406 
407     n = 0;
408     for (p = s; *p != '\0'; p++) {
409 	n *= 16;
410 	if (*p >= 'a' and *p <= 'f') {
411 	    n += (*p - 'a' + 10);
412 	} else if (*p >= 'A' and *p <= 'F') {
413 	    n += (*p - 'A' + 10);
414 	} else {
415 	    n += (*p - '0');
416 	}
417     }
418     return n;
419 }
420 
421 /*
422  * Scan a string.
423  */
424 
425 private Token getstring()
426 {
427     char buf[256];
428     register Char *p, *q;
429     Boolean endofstring;
430 
431     p = curchar;
432     q = buf;
433     endofstring = false;
434     while (not endofstring) {
435 	if (*p == '\n' or *p == '\0') {
436 	    error("non-terminated string");
437 	    endofstring = true;
438 	} else if (*p == '"' or *p == '\'') {
439 	    if (*(p+1) != *p) {
440 		endofstring = true;
441 	    } else {
442 		*q++ = *p;
443 	    }
444 	} else {
445 	    curchar = p;
446 	    *q++ = charcon(p);
447 	    p = curchar;
448 	}
449 	p++;
450     }
451     curchar = p;
452     *q = '\0';
453     yylval.y_string = strdup(buf);
454     return STRING;
455 }
456 
457 /*
458  * Process a character constant.
459  * Watch out for backslashes.
460  */
461 
462 private Char charcon(p)
463 char *p;
464 {
465     char c, buf[10], *q;
466 
467     if (*p == '\\') {
468 	++p;
469 	if (*p != '\\') {
470 	    q = buf;
471 	    do {
472 		*q++ = *p++;
473 	    } while (*p != '\\' and *p != '\'' and *p != '\n' and *p != '\0');
474 	    *q = '\0';
475 	    if (isdigit(buf[0])) {
476 		c = (Char) octal(buf);
477 	    } else {
478 		c = charlookup(buf);
479 	    }
480 	    curchar = p - 1;
481 	} else {
482 	    c = '\\';
483 	}
484     } else {
485 	c = *p;
486     }
487     return c;
488 }
489 
490 /*
491  * Do a lookup for a ASCII character name.
492  */
493 
494 private String ascii[] = {
495     "NUL", "SOH", "STX", "ETX", "EOT", "ENQ", "ACK", "BEL",
496     "BS",  "HT",  "NL",  "VT",  "NP",  "CR",  "SO",  "SI",
497     "DLE", "DC1", "DC2", "DC3", "DC4", "NAK", "SYN", "ETB",
498     "CAN", "EM",  "SUB", "ESC", "FS",  "GS",  "RS",  "US",
499     "SP", nil
500 };
501 
502 private char charlookup(s)
503 String s;
504 {
505     register int i;
506 
507     for (i = 0; ascii[i] != NULL; i++) {
508 	if (streq(s, ascii[i])) {
509 	    return i;
510 	}
511     }
512     if (streq(s, "DEL")) {
513 	return 0177;
514     }
515     error("unknown ascii name \"%s\"", s);
516     return '?';
517 }
518 
519 /*
520  * Input file management routines.
521  */
522 
523 public setinput(filename)
524 Filename filename;
525 {
526     File f;
527 
528     f = fopen(filename, "r");
529     if (f == nil) {
530 	error("can't open %s", filename);
531     } else {
532 	if (curinclindex >= MAXINCLDEPTH) {
533 	    error("unreasonable input nesting on \"%s\"", filename);
534 	}
535 	inclinfo[curinclindex].savefile = in;
536 	inclinfo[curinclindex].savefn = errfilename;
537 	inclinfo[curinclindex].savelineno = errlineno;
538 	curinclindex++;
539 	in = f;
540 	errfilename = filename;
541 	errlineno = 1;
542     }
543 }
544 
545 private Boolean eofinput()
546 {
547     register Boolean b;
548 
549     if (curinclindex == 0) {
550 	if (isterm(in)) {
551 	    putchar('\n');
552 	    clearerr(in);
553 	    b = false;
554 	} else {
555 	    b = true;
556 	}
557     } else {
558 	fclose(in);
559 	--curinclindex;
560 	in = inclinfo[curinclindex].savefile;
561 	errfilename = inclinfo[curinclindex].savefn;
562 	errlineno = inclinfo[curinclindex].savelineno;
563 	b = false;
564     }
565     return b;
566 }
567 
568 /*
569  * Pop the current input.  Return whether successful.
570  */
571 
572 public Boolean popinput()
573 {
574     Boolean b;
575 
576     if (curinclindex == 0) {
577 	b = false;
578     } else {
579 	b = (Boolean) (not eofinput());
580     }
581     return b;
582 }
583 
584 /*
585  * Return whether we are currently reading from standard input.
586  */
587 
588 public Boolean isstdin()
589 {
590     return (Boolean) (in == stdin);
591 }
592 
593 /*
594  * Send the current line to the shell.
595  */
596 
597 public shellline()
598 {
599     register char *p;
600 
601     p = curchar;
602     while (*p != '\0' and (*p == '\n' or lexclass[*p] == WHITE)) {
603 	++p;
604     }
605     shell(p);
606     if (*p == '\0' and isterm(in)) {
607 	putchar('\n');
608     }
609     erecover();
610 }
611 
612 /*
613  * Read the rest of the current line in "shell mode".
614  */
615 
616 public beginshellmode()
617 {
618     shellmode = true;
619 }
620 
621 /*
622  * Print out a token for debugging.
623  */
624 
625 public print_token(f, t)
626 File f;
627 Token t;
628 {
629     if (t == '\n') {
630 	fprintf(f, "char '\\n'");
631     } else if (t == EOF) {
632 	fprintf(f, "EOF");
633     } else if (t < 256) {
634 	fprintf(f, "char '%c'", t);
635     } else {
636 	fprintf(f, "\"%s\"", keywdstring(t));
637     }
638 }
639