xref: /csrg-svn/old/dbx/scanner.c (revision 11767)
1 /* Copyright (c) 1982 Regents of the University of California */
2 
3 static char sccsid[] = "@(#)scanner.c 1.5 03/30/83";
4 
5 /*
6  * Debugger scanner.
7  */
8 
9 #include "defs.h"
10 #include "scanner.h"
11 #include "main.h"
12 #include "keywords.h"
13 #include "tree.h"
14 #include "symbols.h"
15 #include "names.h"
16 #include "y.tab.h"
17 
18 #ifndef public
19 typedef int Token;
20 #endif
21 
22 public String initfile = ".dbxinit";
23 
24 typedef enum { WHITE, ALPHA, NUM, OTHER } Charclass;
25 
26 private Charclass class[256 + 1];
27 private Charclass *lexclass = class + 1;
28 
29 #define isdigit(c) (lexclass[c] == NUM)
30 #define isalnum(c) (lexclass[c] == ALPHA or lexclass[c] == NUM)
31 #define ishexdigit(c) ( \
32     isdigit(c) or (c >= 'a' and c <= 'f') or (c >= 'A' and c <= 'F') \
33 )
34 
35 #define MAXLINESIZE 1024
36 
37 private File in;
38 private Char linebuf[MAXLINESIZE];
39 private Char *curchar;
40 
41 #define MAXINCLDEPTH 10
42 
43 private struct {
44     File savefile;
45     Filename savefn;
46     int savelineno;
47 } inclinfo[MAXINCLDEPTH];
48 
49 private unsigned int curinclindex;
50 
51 private Token getident();
52 private Token getnum();
53 private Token getstring();
54 private Boolean eofinput();
55 private Char charcon();
56 private Char charlookup();
57 
58 private enterlexclass(class, s)
59 Charclass class;
60 String s;
61 {
62     register char *p;
63 
64     for (p = s; *p != '\0'; p++) {
65 	lexclass[*p] = class;
66     }
67 }
68 
69 public scanner_init()
70 {
71     register Integer i;
72 
73     for (i = 0; i < 257; i++) {
74 	class[i] = OTHER;
75     }
76     enterlexclass(WHITE, " \t");
77     enterlexclass(ALPHA, "abcdefghijklmnopqrstuvwxyz");
78     enterlexclass(ALPHA, "ABCDEFGHIJKLMNOPQRSTUVWXYZ_$");
79     enterlexclass(NUM, "0123456789");
80     in = stdin;
81     errfilename = nil;
82     errlineno = 0;
83     curchar = linebuf;
84     linebuf[0] = '\0';
85 }
86 
87 /*
88  * Read a single token.
89  *
90  * Input is line buffered.
91  *
92  * There are two "modes" of operation:  one as in a compiler,
93  * and one for reading shell-like syntax.
94  */
95 
96 private Boolean shellmode;
97 
98 public Token yylex()
99 {
100     register int c;
101     register char *p;
102     register Token t;
103     String line;
104 
105     p = curchar;
106     if (*p == '\0') {
107 	do {
108 	    if (isterm(in)) {
109 		printf("> ");
110 		fflush(stdout);
111 	    }
112 	    line = fgets(linebuf, MAXLINESIZE, in);
113 	} while (line == nil and not eofinput());
114 	if (line == nil) {
115 	    c = EOF;
116 	} else {
117 	    p = linebuf;
118 	    while (lexclass[*p] == WHITE) {
119 		p++;
120 	    }
121 	    shellmode = false;
122 	}
123     } else {
124 	while (lexclass[*p] == WHITE) {
125 	    p++;
126 	}
127     }
128     curchar = p;
129     c = *p;
130     if (lexclass[c] == ALPHA) {
131 	t = getident();
132     } else if (lexclass[c] == NUM) {
133 	t = getnum();
134     } else {
135 	++curchar;
136 	switch (c) {
137 	    case '\n':
138 		t = '\n';
139 		if (errlineno != 0) {
140 		    errlineno++;
141 		}
142 		break;
143 
144 	    case '"':
145 	    case '\'':
146 		t = getstring();
147 		break;
148 
149 	    case '.':
150 		if (shellmode) {
151 		    --curchar;
152 		    t = getident();
153 		} else if (isdigit(*curchar)) {
154 		    --curchar;
155 		    t = getnum();
156 		} else {
157 		    t = '.';
158 		}
159 		break;
160 
161 	    case '<':
162 		if (not shellmode and *curchar == '<') {
163 		    ++curchar;
164 		    t = LFORMER;
165 		} else {
166 		    t = '<';
167 		}
168 		break;
169 
170 	    case '>':
171 		if (not shellmode and *curchar == '>') {
172 		    ++curchar;
173 		    t = RFORMER;
174 		} else {
175 		    t = '>';
176 		}
177 		break;
178 
179 	    case '#':
180 		if (*curchar == '^') {
181 		    ++curchar;
182 		    t = ABSTRACTION;
183 		} else {
184 		    t = '#';
185 		}
186 		break;
187 
188 	    case '-':
189 		if (shellmode) {
190 		    --curchar;
191 		    t = getident();
192 		} else if (*curchar == '>') {
193 		    ++curchar;
194 		    t = ARROW;
195 		} else {
196 		    t = '-';
197 		}
198 		break;
199 
200 	    case EOF:
201 		t = 0;
202 		break;
203 
204 	    default:
205 		if (shellmode and index("!&*()[]", c) == nil) {
206 		    --curchar;
207 		    t = getident();
208 		} else {
209 		    t = c;
210 		}
211 		break;
212 	}
213     }
214 #   ifdef LEXDEBUG
215 	if (lexdebug) {
216 	    fprintf(stderr, "yylex returns ");
217 	    print_token(stderr, t);
218 	    fprintf(stderr, "\n");
219 	}
220 #   endif
221     return t;
222 }
223 
224 /*
225  * Parser error handling.
226  */
227 
228 public yyerror(s)
229 String s;
230 {
231     register Char *p, *tokenbegin, *tokenend;
232     register Integer len;
233 
234     if (streq(s, "syntax error")) {
235 	beginerrmsg();
236 	tokenend = curchar - 1;
237 	tokenbegin = tokenend;
238 	while (lexclass[*tokenbegin] != WHITE and tokenbegin > &linebuf[0]) {
239 	    --tokenbegin;
240 	}
241 	len = tokenend - tokenbegin + 1;
242 	p = tokenbegin;
243 	if (p > &linebuf[0]) {
244 	    while (lexclass[*p] == WHITE and p > &linebuf[0]) {
245 		--p;
246 	    }
247 	}
248 	if (p == &linebuf[0]) {
249 	    fprintf(stderr, "unrecognized command \"%.*s\"", len, tokenbegin);
250 	} else {
251 	    fprintf(stderr, "syntax error");
252 	    if (len != 0) {
253 		fprintf(stderr, " on \"%.*s\"", len, tokenbegin);
254 	    }
255 	}
256 	enderrmsg();
257     } else {
258 	error(s);
259     }
260 }
261 
262 /*
263  * Eat the current line.
264  */
265 
266 public gobble()
267 {
268     curchar = linebuf;
269     linebuf[0] = '\0';
270 }
271 
272 /*
273  * Scan an identifier and check to see if it's a keyword.
274  */
275 
276 private Token getident()
277 {
278     char buf[256];
279     register Char *p, *q;
280     register Token t;
281 
282     p = curchar;
283     q = buf;
284     if (shellmode) {
285 	do {
286 	    *q++ = *p++;
287 	} while (index(" \t\n!&<>*[]()", *p) == nil);
288     } else {
289 	do {
290 	    *q++ = *p++;
291 	} while (isalnum(*p));
292     }
293     curchar = p;
294     *q = '\0';
295     yylval.y_name = identname(buf, false);
296     if (not shellmode) {
297 	t = findkeyword(yylval.y_name);
298 	if (t == nil) {
299 	    t = NAME;
300 	}
301     } else {
302 	t = NAME;
303     }
304     return t;
305 }
306 
307 /*
308  * Scan a number.
309  */
310 
311 private Token getnum()
312 {
313     char buf[256];
314     register Char *p, *q;
315     register Token t;
316     Integer base;
317 
318     p = curchar;
319     q = buf;
320     if (*p == '0') {
321 	if (*(p+1) == 'x') {
322 	    p += 2;
323 	    base = 16;
324 	} else {
325 	    base = 8;
326 	}
327     } else {
328 	base = 10;
329     }
330     if (base == 16) {
331 	do {
332 	    *q++ = *p++;
333 	} while (ishexdigit(*p));
334     } else {
335 	do {
336 	    *q++ = *p++;
337 	} while (isdigit(*p));
338     }
339     if (*p == '.') {
340 	do {
341 	    *q++ = *p++;
342 	} while (isdigit(*p));
343 	if (*p == 'e' or *p == 'E') {
344 	    p++;
345 	    if (*p == '+' or *p == '-' or isdigit(*p)) {
346 		*q++ = 'e';
347 		do {
348 		    *q++ = *p++;
349 		} while (isdigit(*p));
350 	    }
351 	}
352 	*q = '\0';
353 	yylval.y_real = atof(buf);
354 	t = REAL;
355     } else {
356 	*q = '\0';
357 	switch (base) {
358 	    case 10:
359 		yylval.y_int = atol(buf);
360 		break;
361 
362 	    case 8:
363 		yylval.y_int = octal(buf);
364 		break;
365 
366 	    case 16:
367 		yylval.y_int = hex(buf);
368 		break;
369 
370 	    default:
371 		badcaseval(base);
372 	}
373 	t = INT;
374     }
375     curchar = p;
376     return t;
377 }
378 
379 /*
380  * Convert a string of octal digits to an integer.
381  */
382 
383 private int octal(s)
384 String s;
385 {
386     register Char *p;
387     register Integer n;
388 
389     n = 0;
390     for (p = s; *p != '\0'; p++) {
391 	n = 8*n + (*p - '0');
392     }
393     return n;
394 }
395 
396 /*
397  * Convert a string of hexadecimal digits to an integer.
398  */
399 
400 private int hex(s)
401 String s;
402 {
403     register Char *p;
404     register Integer n;
405 
406     n = 0;
407     for (p = s; *p != '\0'; p++) {
408 	n *= 16;
409 	if (*p >= 'a' and *p <= 'f') {
410 	    n += (*p - 'a' + 10);
411 	} else if (*p >= 'A' and *p <= 'F') {
412 	    n += (*p - 'A' + 10);
413 	} else {
414 	    n += (*p - '0');
415 	}
416     }
417     return n;
418 }
419 
420 /*
421  * Scan a string.
422  */
423 
424 private Token getstring()
425 {
426     char buf[256];
427     register Char *p, *q;
428     Boolean endofstring;
429 
430     p = curchar;
431     q = buf;
432     endofstring = false;
433     while (not endofstring) {
434 	if (*p == '\n' or *p == '\0') {
435 	    error("non-terminated string");
436 	    endofstring = true;
437 	} else if (*p == '"' or *p == '\'') {
438 	    if (*(p+1) != *p) {
439 		endofstring = true;
440 	    } else {
441 		*q++ = *p;
442 	    }
443 	} else {
444 	    *q++ = charcon(p);
445 	    p = curchar;
446 	}
447 	p++;
448     }
449     curchar = p;
450     *q = '\0';
451     yylval.y_string = strdup(buf);
452     return STRING;
453 }
454 
455 /*
456  * Process a character constant.
457  * Watch out for backslashes.
458  */
459 
460 private Char charcon(p)
461 char *p;
462 {
463     char c, buf[10], *q;
464 
465     if (*p == '\\') {
466 	++p;
467 	if (*p != '\\') {
468 	    q = buf;
469 	    do {
470 		*q++ = *p++;
471 	    } while (*p != '\\' and *p != '\'' and *p != '\n' and *p != '\0');
472 	    *q = '\0';
473 	    if (isdigit(buf[0])) {
474 		c = (Char) octal(buf);
475 	    } else {
476 		c = charlookup(buf);
477 	    }
478 	    curchar = p - 1;
479 	} else {
480 	    c = '\\';
481 	}
482     } else {
483 	c = *p;
484     }
485     return c;
486 }
487 
488 /*
489  * Do a lookup for a ASCII character name.
490  */
491 
492 private String ascii[] = {
493     "NUL", "SOH", "STX", "ETX", "EOT", "ENQ", "ACK", "BEL",
494     "BS",  "HT",  "NL",  "VT",  "NP",  "CR",  "SO",  "SI",
495     "DLE", "DC1", "DC2", "DC3", "DC4", "NAK", "SYN", "ETB",
496     "CAN", "EM",  "SUB", "ESC", "FS",  "GS",  "RS",  "US",
497     "SP", nil
498 };
499 
500 private char charlookup(s)
501 String s;
502 {
503     register int i;
504 
505     for (i = 0; ascii[i] != NULL; i++) {
506 	if (streq(s, ascii[i])) {
507 	    return i;
508 	}
509     }
510     if (streq(s, "DEL")) {
511 	return 0177;
512     }
513     error("unknown ascii name \"%s\"", s);
514     return '?';
515 }
516 
517 /*
518  * Input file management routines.
519  */
520 
521 public setinput(filename)
522 Filename filename;
523 {
524     File f;
525 
526     f = fopen(filename, "r");
527     if (f == nil) {
528 	error("can't open %s", filename);
529     } else {
530 	if (curinclindex >= MAXINCLDEPTH) {
531 	    error("unreasonable input nesting on \"%s\"", filename);
532 	}
533 	inclinfo[curinclindex].savefile = in;
534 	inclinfo[curinclindex].savefn = errfilename;
535 	inclinfo[curinclindex].savelineno = errlineno;
536 	curinclindex++;
537 	in = f;
538 	errfilename = filename;
539 	errlineno = 1;
540     }
541 }
542 
543 private Boolean eofinput()
544 {
545     register Boolean b;
546 
547     if (curinclindex == 0) {
548 	if (isterm(in)) {
549 	    putchar('\n');
550 	    b = false;
551 	} else {
552 	    b = true;
553 	}
554     } else {
555 	fclose(in);
556 	--curinclindex;
557 	in = inclinfo[curinclindex].savefile;
558 	errfilename = inclinfo[curinclindex].savefn;
559 	errlineno = inclinfo[curinclindex].savelineno;
560 	b = false;
561     }
562     return b;
563 }
564 
565 /*
566  * Pop the current input.  Return whether successful.
567  */
568 
569 public Boolean popinput()
570 {
571     Boolean b;
572 
573     if (curinclindex == 0) {
574 	b = false;
575     } else {
576 	b = (Boolean) (not eofinput());
577     }
578     return b;
579 }
580 
581 /*
582  * Return whether we are currently reading from standard input.
583  */
584 
585 public Boolean isstdin()
586 {
587     return (Boolean) (in == stdin);
588 }
589 
590 /*
591  * Send the current line to the shell.
592  */
593 
594 public shellline()
595 {
596     register char *p;
597 
598     p = curchar;
599     while (*p != '\0' and (*p == '\n' or lexclass[*p] == WHITE)) {
600 	++p;
601     }
602     shell(p);
603     if (*p == '\0' and isterm(in)) {
604 	putchar('\n');
605     }
606     erecover();
607 }
608 
609 /*
610  * Read the rest of the current line in "shell mode".
611  */
612 
613 public beginshellmode()
614 {
615     shellmode = true;
616 }
617 
618 /*
619  * Print out a token for debugging.
620  */
621 
622 public print_token(f, t)
623 File f;
624 Token t;
625 {
626     if (t == '\n') {
627 	fprintf(f, "char '\\n'");
628     } else if (t == EOF) {
629 	fprintf(f, "EOF");
630     } else if (t < 256) {
631 	fprintf(f, "char '%c'", t);
632     } else {
633 	fprintf(f, "\"%s\"", keywdstring(t));
634     }
635 }
636