xref: /csrg-svn/old/dbx/scanner.c (revision 18232)
1 /* Copyright (c) 1982 Regents of the University of California */
2 
3 static char sccsid[] = "@(#)scanner.c	1.12 (Berkeley) 03/01/85";
4 
5 static char rcsid[] = "$Header: scanner.c,v 1.5 84/12/26 10:42:05 linton Exp $";
6 
7 /*
8  * Debugger scanner.
9  */
10 
11 #include "defs.h"
12 #include "scanner.h"
13 #include "main.h"
14 #include "keywords.h"
15 #include "tree.h"
16 #include "symbols.h"
17 #include "names.h"
18 #include "y.tab.h"
19 
20 #ifndef public
21 typedef int Token;
22 
23 #define MAXLINESIZE 10240
24 
25 #endif
26 
27 public String initfile = ".dbxinit";
28 
29 typedef enum { WHITE, ALPHA, NUM, OTHER } Charclass;
30 
31 private Charclass class[256 + 1];
32 private Charclass *lexclass = class + 1;
33 
34 #define isdigit(c) (lexclass[c] == NUM)
35 #define isalnum(c) (lexclass[c] == ALPHA or lexclass[c] == NUM)
36 #define ishexdigit(c) ( \
37     isdigit(c) or (c >= 'a' and c <= 'f') or (c >= 'A' and c <= 'F') \
38 )
39 
40 public boolean chkalias;
41 public char scanner_linebuf[MAXLINESIZE];
42 
43 private File in;
44 private char *curchar, *prevchar;
45 
46 #define MAXINCLDEPTH 10
47 
48 private struct {
49     File savefile;
50     Filename savefn;
51     int savelineno;
52 } inclinfo[MAXINCLDEPTH];
53 
54 private unsigned int curinclindex;
55 
56 private Token getident();
57 private Token getnum();
58 private Token getstring();
59 private Boolean eofinput();
60 private char charcon();
61 
62 private enterlexclass(class, s)
63 Charclass class;
64 String s;
65 {
66     register char *p;
67 
68     for (p = s; *p != '\0'; p++) {
69 	lexclass[*p] = class;
70     }
71 }
72 
73 public scanner_init()
74 {
75     register Integer i;
76 
77     for (i = 0; i < 257; i++) {
78 	class[i] = OTHER;
79     }
80     enterlexclass(WHITE, " \t");
81     enterlexclass(ALPHA, "abcdefghijklmnopqrstuvwxyz");
82     enterlexclass(ALPHA, "ABCDEFGHIJKLMNOPQRSTUVWXYZ_$");
83     enterlexclass(NUM, "0123456789");
84     in = stdin;
85     errfilename = nil;
86     errlineno = 0;
87     curchar = scanner_linebuf;
88     scanner_linebuf[0] = '\0';
89     chkalias = true;
90 }
91 
92 /*
93  * Read a single token.
94  *
95  * The input is line buffered.  Tokens cannot cross line boundaries.
96  *
97  * There are two "modes" of operation:  one as in a compiler,
98  * and one for reading shell-like syntax.  In the first mode
99  * there is the additional choice of doing alias processing.
100  */
101 
102 private Boolean shellmode;
103 
104 public Token yylex()
105 {
106     register int c;
107     register char *p;
108     register Token t;
109     String line;
110     integer n;
111 
112     p = curchar;
113     if (*p == '\0') {
114 	do {
115 	    if (isterm(in)) {
116 		printf("(%s) ", cmdname);
117 	    }
118 	    fflush(stdout);
119 	    line = fgets(scanner_linebuf, MAXLINESIZE, in);
120 	} while (line == nil and not eofinput());
121 	if (line == nil) {
122 	    c = EOF;
123 	} else {
124 	    p = scanner_linebuf;
125 	    while (lexclass[*p] == WHITE) {
126 		p++;
127 	    }
128 	    shellmode = false;
129 	}
130 	chkalias = true;
131     } else {
132 	while (lexclass[*p] == WHITE) {
133 	    p++;
134 	}
135     }
136     curchar = p;
137     prevchar = curchar;
138     c = *p;
139     if (lexclass[c] == ALPHA) {
140 	t = getident(chkalias);
141     } else if (lexclass[c] == NUM) {
142 	if (shellmode) {
143 	    t = getident(chkalias);
144 	} else {
145 	    t = getnum();
146 	}
147     } else {
148 	++curchar;
149 	switch (c) {
150 	    case '\n':
151 		t = '\n';
152 		if (errlineno != 0) {
153 		    errlineno++;
154 		}
155 		break;
156 
157 	    case '"':
158 	    case '\'':
159 		t = getstring(c);
160 		break;
161 
162 	    case '.':
163 		if (shellmode) {
164 		    --curchar;
165 		    t = getident(chkalias);
166 		} else if (isdigit(*curchar)) {
167 		    --curchar;
168 		    t = getnum();
169 		} else {
170 		    t = '.';
171 		}
172 		break;
173 
174 	    case '-':
175 		if (shellmode) {
176 		    --curchar;
177 		    t = getident(chkalias);
178 		} else if (*curchar == '>') {
179 		    ++curchar;
180 		    t = ARROW;
181 		} else {
182 		    t = '-';
183 		}
184 		break;
185 
186 	    case '#':
187 		if (not isterm(in)) {
188 		    *p = '\0';
189 		    curchar = p;
190 		    t = '\n';
191 		    ++errlineno;
192 		} else {
193 		    t = '#';
194 		}
195 		break;
196 
197 	    case '\\':
198 		if (*(p+1) == '\n') {
199 		    n = MAXLINESIZE - (p - &scanner_linebuf[0]);
200 		    if (n > 1) {
201 			if (fgets(p, n, in) == nil) {
202 			    t = 0;
203 			} else {
204 			    curchar = p;
205 			    t = yylex();
206 			}
207 		    } else {
208 			t = '\\';
209 		    }
210 		} else {
211 		    t = '\\';
212 		}
213 		break;
214 
215 	    case EOF:
216 		t = 0;
217 		break;
218 
219 	    default:
220 		if (shellmode and index("!&*<>()[]", c) == nil) {
221 		    --curchar;
222 		    t = getident(chkalias);
223 		} else {
224 		    t = c;
225 		}
226 		break;
227 	}
228     }
229     chkalias = false;
230 #   ifdef LEXDEBUG
231 	if (lexdebug) {
232 	    fprintf(stderr, "yylex returns ");
233 	    print_token(stderr, t);
234 	    fprintf(stderr, "\n");
235 	}
236 #   endif
237     return t;
238 }
239 
240 /*
241  * Put the given string before the current character
242  * in the current line, thus inserting it into the input stream.
243  */
244 
245 public insertinput (s)
246 String s;
247 {
248     register char *p, *q;
249     int need, avail, shift;
250 
251     q = s;
252     need = strlen(q);
253     avail = curchar - &scanner_linebuf[0];
254     if (need <= avail) {
255 	curchar = &scanner_linebuf[avail - need];
256 	p = curchar;
257 	while (*q != '\0') {
258 	    *p++ = *q++;
259 	}
260     } else {
261 	p = curchar;
262 	while (*p != '\0') {
263 	    ++p;
264 	}
265 	shift = need - avail;
266 	if (p + shift >= &scanner_linebuf[MAXLINESIZE]) {
267 	    error("alias expansion too large");
268 	}
269 	for (;;) {
270 	    *(p + shift) = *p;
271 	    if (p == curchar) {
272 		break;
273 	    }
274 	    --p;
275 	}
276 	p = &scanner_linebuf[0];
277 	while (*q != '\0') {
278 	    *p++ = *q++;
279 	}
280 	curchar = &scanner_linebuf[0];
281     }
282 }
283 
284 /*
285  * Get the actuals for a macro call.
286  */
287 
288 private String movetochar (str, c)
289 String str;
290 char c;
291 {
292     register char *p;
293 
294     while (*p != c) {
295 	if (*p == '\0') {
296 	    error("missing ')' in macro call");
297 	} else if (*p == ')') {
298 	    error("not enough parameters in macro call");
299 	} else if (*p == ',') {
300 	    error("too many parameters in macro call");
301 	}
302 	++p;
303     }
304     return p;
305 }
306 
307 private String *getactuals (n)
308 integer n;
309 {
310     String *a;
311     register char *p;
312     int i;
313 
314     a = newarr(String, n);
315     p = curchar;
316     while (*p != '(') {
317 	if (lexclass[*p] != WHITE) {
318 	    error("missing actuals for macro");
319 	}
320 	++p;
321     }
322     ++p;
323     for (i = 0; i < n - 1; i++) {
324 	a[i] = p;
325 	p = movetochar(p, ',');
326 	*p = '\0';
327 	++p;
328     }
329     a[n-1] = p;
330     p = movetochar(p, ')');
331     *p = '\0';
332     curchar = p + 1;
333     return a;
334 }
335 
336 /*
337  * Do command macro expansion, assuming curchar points to the beginning
338  * of the actuals, and we are not in shell mode.
339  */
340 
341 private expand (pl, str)
342 List pl;
343 String str;
344 {
345     char buf[4096], namebuf[100];
346     register char *p, *q, *r;
347     String *actual;
348     Name n;
349     integer i;
350     boolean match;
351 
352     if (pl == nil) {
353 	insertinput(str);
354     } else {
355 	actual = getactuals(list_size(pl));
356 	p = buf;
357 	q = str;
358 	while (*q != '\0') {
359 	    if (p >= &buf[4096]) {
360 		error("alias expansion too large");
361 	    }
362 	    if (lexclass[*q] == ALPHA) {
363 		r = namebuf;
364 		do {
365 		    *r++ = *q++;
366 		} while (isalnum(*q));
367 		*r = '\0';
368 		i = 0;
369 		match = false;
370 		foreach(Name, n, pl)
371 		    if (streq(ident(n), namebuf)) {
372 			match = true;
373 			break;
374 		    }
375 		    ++i;
376 		endfor
377 		if (match) {
378 		    r = actual[i];
379 		} else {
380 		    r = namebuf;
381 		}
382 		while (*r != '\0') {
383 		    *p++ = *r++;
384 		}
385 	    } else {
386 		*p++ = *q++;
387 	    }
388 	}
389 	*p = '\0';
390 	insertinput(buf);
391     }
392 }
393 
394 /*
395  * Parser error handling.
396  */
397 
398 public yyerror(s)
399 String s;
400 {
401     register char *p;
402     register integer start;
403 
404     if (streq(s, "syntax error")) {
405 	beginerrmsg();
406 	p = prevchar;
407 	start = p - &scanner_linebuf[0];
408 	if (p > &scanner_linebuf[0]) {
409 	    while (lexclass[*p] == WHITE and p > &scanner_linebuf[0]) {
410 		--p;
411 	    }
412 	}
413 	fprintf(stderr, "%s", scanner_linebuf);
414 	if (start != 0) {
415 	    fprintf(stderr, "%*c", start, ' ');
416 	}
417 	if (p == &scanner_linebuf[0]) {
418 	    fprintf(stderr, "^ unrecognized command");
419 	} else {
420 	    fprintf(stderr, "^ syntax error");
421 	}
422 	enderrmsg();
423     } else {
424 	error(s);
425     }
426 }
427 
428 /*
429  * Eat the current line.
430  */
431 
432 public gobble ()
433 {
434     curchar = scanner_linebuf;
435     scanner_linebuf[0] = '\0';
436 }
437 
438 /*
439  * Scan an identifier.
440  *
441  * If chkalias is true, check first to see if it's an alias.
442  * Otherwise, check to see if it's a keyword.
443  */
444 
445 private Token getident (chkalias)
446 boolean chkalias;
447 {
448     char buf[1024];
449     register char *p, *q;
450     register Token t;
451     List pl;
452     String str;
453 
454     p = curchar;
455     q = buf;
456     if (shellmode) {
457 	do {
458 	    *q++ = *p++;
459 	} while (index(" \t\n!&<>*[]()'\"", *p) == nil);
460     } else {
461 	do {
462 	    *q++ = *p++;
463 	} while (isalnum(*p));
464     }
465     curchar = p;
466     *q = '\0';
467     yylval.y_name = identname(buf, false);
468     if (chkalias) {
469 	if (findalias(yylval.y_name, &pl, &str)) {
470 	    expand(pl, str);
471 	    while (lexclass[*curchar] == WHITE) {
472 		++curchar;
473 	    }
474 	    if (pl == nil) {
475 		t = getident(false);
476 	    } else {
477 		t = getident(true);
478 	    }
479 	} else if (shellmode) {
480 	    t = NAME;
481 	} else {
482 	    t = findkeyword(yylval.y_name, NAME);
483 	}
484     } else if (shellmode) {
485 	t = NAME;
486     } else {
487 	t = findkeyword(yylval.y_name, NAME);
488     }
489     return t;
490 }
491 
492 /*
493  * Scan a number.
494  */
495 
496 private Token getnum()
497 {
498     char buf[1024];
499     register Char *p, *q;
500     register Token t;
501     Integer base;
502 
503     p = curchar;
504     q = buf;
505     if (*p == '0') {
506 	if (*(p+1) == 'x') {
507 	    p += 2;
508 	    base = 16;
509 	} else if (*(p+1) == 't') {
510 	    base = 10;
511 	} else if (varIsSet("$hexin")) {
512 	    base = 16;
513 	} else {
514 	    base = 8;
515 	}
516     } else if (varIsSet("$hexin")) {
517 	base = 16;
518     } else if (varIsSet("$octin")) {
519 	base = 8;
520     } else {
521 	base = 10;
522     }
523     if (base == 16) {
524 	do {
525 	    *q++ = *p++;
526 	} while (ishexdigit(*p));
527     } else {
528 	do {
529 	    *q++ = *p++;
530 	} while (isdigit(*p));
531     }
532     if (*p == '.') {
533 	do {
534 	    *q++ = *p++;
535 	} while (isdigit(*p));
536 	if (*p == 'e' or *p == 'E') {
537 	    p++;
538 	    if (*p == '+' or *p == '-' or isdigit(*p)) {
539 		*q++ = 'e';
540 		do {
541 		    *q++ = *p++;
542 		} while (isdigit(*p));
543 	    }
544 	}
545 	*q = '\0';
546 	yylval.y_real = atof(buf);
547 	t = REAL;
548     } else {
549 	*q = '\0';
550 	switch (base) {
551 	    case 10:
552 		yylval.y_int = atol(buf);
553 		break;
554 
555 	    case 8:
556 		yylval.y_int = octal(buf);
557 		break;
558 
559 	    case 16:
560 		yylval.y_int = hex(buf);
561 		break;
562 
563 	    default:
564 		badcaseval(base);
565 	}
566 	t = INT;
567     }
568     curchar = p;
569     return t;
570 }
571 
572 /*
573  * Convert a string of octal digits to an integer.
574  */
575 
576 private int octal(s)
577 String s;
578 {
579     register Char *p;
580     register Integer n;
581 
582     n = 0;
583     for (p = s; *p != '\0'; p++) {
584 	n = 8*n + (*p - '0');
585     }
586     return n;
587 }
588 
589 /*
590  * Convert a string of hexadecimal digits to an integer.
591  */
592 
593 private int hex(s)
594 String s;
595 {
596     register Char *p;
597     register Integer n;
598 
599     n = 0;
600     for (p = s; *p != '\0'; p++) {
601 	n *= 16;
602 	if (*p >= 'a' and *p <= 'f') {
603 	    n += (*p - 'a' + 10);
604 	} else if (*p >= 'A' and *p <= 'F') {
605 	    n += (*p - 'A' + 10);
606 	} else {
607 	    n += (*p - '0');
608 	}
609     }
610     return n;
611 }
612 
613 /*
614  * Scan a string.
615  */
616 
617 private Token getstring (quote)
618 char quote;
619 {
620     register char *p, *q;
621     char buf[MAXLINESIZE];
622     boolean endofstring;
623     Token t;
624 
625     p = curchar;
626     q = buf;
627     endofstring = false;
628     while (not endofstring) {
629 	if (*p == '\\' and *(p+1) == '\n') {
630 	    if (fgets(scanner_linebuf, MAXLINESIZE, in) == nil) {
631 		error("non-terminated string");
632 	    }
633 	    p = &scanner_linebuf[0] - 1;
634 	} else if (*p == '\n' or *p == '\0') {
635 	    error("non-terminated string");
636 	    endofstring = true;
637 	} else if (*p == quote) {
638 	    endofstring = true;
639 	} else {
640 	    curchar = p;
641 	    *q++ = charcon(p);
642 	    p = curchar;
643 	}
644 	p++;
645     }
646     curchar = p;
647     *q = '\0';
648     if (quote == '\'' and buf[1] == '\0') {
649 	yylval.y_char = buf[0];
650 	t = CHAR;
651     } else {
652 	yylval.y_string = strdup(buf);
653 	t = STRING;
654     }
655     return t;
656 }
657 
658 /*
659  * Process a character constant.
660  * Watch out for backslashes.
661  */
662 
663 private char charcon (s)
664 String s;
665 {
666     register char *p, *q;
667     char c, buf[10];
668 
669     p = s;
670     if (*p == '\\') {
671 	++p;
672 	switch (*p) {
673 	    case '\\':
674 		c = '\\';
675 		break;
676 
677 	    case 'n':
678 		c = '\n';
679 		break;
680 
681 	    case 'r':
682 		c = '\r';
683 		break;
684 
685 	    case 't':
686 		c = '\t';
687 		break;
688 
689 	    case '\'':
690 	    case '"':
691 		c = *p;
692 		break;
693 
694 	    default:
695 		if (isdigit(*p)) {
696 		    q = buf;
697 		    do {
698 			*q++ = *p++;
699 		    } while (isdigit(*p));
700 		    *q = '\0';
701 		    c = (char) octal(buf);
702 		}
703 		--p;
704 		break;
705 	}
706 	curchar = p;
707     } else {
708 	c = *p;
709     }
710     return c;
711 }
712 
713 /*
714  * Input file management routines.
715  */
716 
717 public setinput(filename)
718 Filename filename;
719 {
720     File f;
721 
722     f = fopen(filename, "r");
723     if (f == nil) {
724 	error("can't open %s", filename);
725     } else {
726 	if (curinclindex >= MAXINCLDEPTH) {
727 	    error("unreasonable input nesting on \"%s\"", filename);
728 	}
729 	inclinfo[curinclindex].savefile = in;
730 	inclinfo[curinclindex].savefn = errfilename;
731 	inclinfo[curinclindex].savelineno = errlineno;
732 	curinclindex++;
733 	in = f;
734 	errfilename = filename;
735 	errlineno = 1;
736     }
737 }
738 
739 private Boolean eofinput()
740 {
741     register Boolean b;
742 
743     if (curinclindex == 0) {
744 	if (isterm(in)) {
745 	    putchar('\n');
746 	    clearerr(in);
747 	    b = false;
748 	} else {
749 	    b = true;
750 	}
751     } else {
752 	fclose(in);
753 	--curinclindex;
754 	in = inclinfo[curinclindex].savefile;
755 	errfilename = inclinfo[curinclindex].savefn;
756 	errlineno = inclinfo[curinclindex].savelineno;
757 	b = false;
758     }
759     return b;
760 }
761 
762 /*
763  * Pop the current input.  Return whether successful.
764  */
765 
766 public Boolean popinput()
767 {
768     Boolean b;
769 
770     if (curinclindex == 0) {
771 	b = false;
772     } else {
773 	b = (Boolean) (not eofinput());
774     }
775     return b;
776 }
777 
778 /*
779  * Return whether we are currently reading from standard input.
780  */
781 
782 public Boolean isstdin()
783 {
784     return (Boolean) (in == stdin);
785 }
786 
787 /*
788  * Send the current line to the shell.
789  */
790 
791 public shellline()
792 {
793     register char *p;
794 
795     p = curchar;
796     while (*p != '\0' and (*p == '\n' or lexclass[*p] == WHITE)) {
797 	++p;
798     }
799     shell(p);
800     if (*p == '\0' and isterm(in)) {
801 	putchar('\n');
802     }
803     erecover();
804 }
805 
806 /*
807  * Read the rest of the current line in "shell mode".
808  */
809 
810 public beginshellmode()
811 {
812     shellmode = true;
813 }
814 
815 /*
816  * Print out a token for debugging.
817  */
818 
819 public print_token(f, t)
820 File f;
821 Token t;
822 {
823     if (t == '\n') {
824 	fprintf(f, "char '\\n'");
825     } else if (t == EOF) {
826 	fprintf(f, "EOF");
827     } else if (t < 256) {
828 	fprintf(f, "char '%c'", t);
829     } else {
830 	fprintf(f, "\"%s\"", keywdstring(t));
831     }
832 }
833