xref: /netbsd-src/sys/ddb/db_lex.c (revision cda4f8f6ee55684e8d311b86c99ea59191e6b74f)
1 /*
2  * Mach Operating System
3  * Copyright (c) 1991,1990 Carnegie Mellon University
4  * All Rights Reserved.
5  *
6  * Permission to use, copy, modify and distribute this software and its
7  * documentation is hereby granted, provided that both the copyright
8  * notice and this permission notice appear in all copies of the
9  * software, derivative works or modified versions, and any portions
10  * thereof, and that both notices appear in supporting documentation.
11  *
12  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS
13  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
14  * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
15  *
16  * Carnegie Mellon requests users of this software to return to
17  *
18  *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
19  *  School of Computer Science
20  *  Carnegie Mellon University
21  *  Pittsburgh PA 15213-3890
22  *
23  * any improvements or extensions that they make and grant Carnegie the
24  * rights to redistribute these changes.
25  */
26 /*
27  * $Id: db_lex.c,v 1.2 1993/05/20 03:39:16 cgd Exp $
28  *
29  * HISTORY
30  * $Log: db_lex.c,v $
31  * Revision 1.2  1993/05/20 03:39:16  cgd
32  * add explicit rcs id
33  *
34  * Revision 1.1.1.1  1993/03/21  09:46:26  cgd
35  * initial import of 386bsd-0.1 sources
36  *
37  * Revision 1.1  1992/03/25  21:45:13  pace
38  * Initial revision
39  *
40  * Revision 2.3  91/02/05  17:06:36  mrt
41  * 	Changed to new Mach copyright
42  * 	[91/01/31  16:18:20  mrt]
43  *
44  * Revision 2.2  90/08/27  21:51:10  dbg
45  * 	Add 'dotdot' token.
46  * 	[90/08/22            dbg]
47  *
48  * 	Allow backslash to quote any character into an identifier.
49  * 	Allow colon in identifier for symbol table qualification.
50  * 	[90/08/16            dbg]
51  * 	Reduce lint.
52  * 	[90/08/07            dbg]
53  * 	Created.
54  * 	[90/07/25            dbg]
55  *
56  */
57 /*
58  *	Author: David B. Golub, Carnegie Mellon University
59  *	Date:	7/90
60  */
61 /*
62  * Lexical analyzer.
63  */
64 #include <ddb/db_lex.h>
65 
66 char	db_line[120];
67 char *	db_lp, *db_endlp;
68 
69 int
70 db_read_line()
71 {
72 	int	i;
73 
74 	i = db_readline(db_line, sizeof(db_line));
75 	if (i == 0)
76 	    return (0);	/* EOI */
77 	db_lp = db_line;
78 	db_endlp = db_lp + i;
79 	return (i);
80 }
81 
82 void
83 db_flush_line()
84 {
85 	db_lp = db_line;
86 	db_endlp = db_line;
87 }
88 
89 int	db_look_char = 0;
90 
91 int
92 db_read_char()
93 {
94 	int	c;
95 
96 	if (db_look_char != 0) {
97 	    c = db_look_char;
98 	    db_look_char = 0;
99 	}
100 	else if (db_lp >= db_endlp)
101 	    c = -1;
102 	else
103 	    c = *db_lp++;
104 	return (c);
105 }
106 
107 void
108 db_unread_char(c)
109 {
110 	db_look_char = c;
111 }
112 
113 int	db_look_token = 0;
114 
115 void
116 db_unread_token(t)
117 	int	t;
118 {
119 	db_look_token = t;
120 }
121 
122 int
123 db_read_token()
124 {
125 	int	t;
126 
127 	if (db_look_token) {
128 	    t = db_look_token;
129 	    db_look_token = 0;
130 	}
131 	else
132 	    t = db_lex();
133 	return (t);
134 }
135 
136 int	db_tok_number;
137 char	db_tok_string[TOK_STRING_SIZE];
138 
139 int	db_radix = 16;
140 
141 void
142 db_flush_lex()
143 {
144 	db_flush_line();
145 	db_look_char = 0;
146 	db_look_token = 0;
147 }
148 
149 int
150 db_lex()
151 {
152 	int	c;
153 
154 	c = db_read_char();
155 	while (c <= ' ' || c > '~') {
156 	    if (c == '\n' || c == -1)
157 		return (tEOL);
158 	    c = db_read_char();
159 	}
160 
161 	if (c >= '0' && c <= '9') {
162 	    /* number */
163 	    int	r, digit;
164 
165 	    if (c > '0')
166 		r = db_radix;
167 	    else {
168 		c = db_read_char();
169 		if (c == 'O' || c == 'o')
170 		    r = 8;
171 		else if (c == 'T' || c == 't')
172 		    r = 10;
173 		else if (c == 'X' || c == 'x')
174 		    r = 16;
175 		else {
176 		    r = db_radix;
177 		    db_unread_char(c);
178 		}
179 		c = db_read_char();
180 	    }
181 	    db_tok_number = 0;
182 	    for (;;) {
183 		if (c >= '0' && c <= ((r == 8) ? '7' : '9'))
184 		    digit = c - '0';
185 		else if (r == 16 && ((c >= 'A' && c <= 'F') ||
186 				     (c >= 'a' && c <= 'f'))) {
187 		    if (c >= 'a')
188 			digit = c - 'a' + 10;
189 		    else if (c >= 'A')
190 			digit = c - 'A' + 10;
191 		}
192 		else
193 		    break;
194 		db_tok_number = db_tok_number * r + digit;
195 		c = db_read_char();
196 	    }
197 	    if ((c >= '0' && c <= '9') ||
198 		(c >= 'A' && c <= 'Z') ||
199 		(c >= 'a' && c <= 'z') ||
200 		(c == '_'))
201 	    {
202 		db_error("Bad character in number\n");
203 		db_flush_lex();
204 		return (tEOF);
205 	    }
206 	    db_unread_char(c);
207 	    return (tNUMBER);
208 	}
209 	if ((c >= 'A' && c <= 'Z') ||
210 	    (c >= 'a' && c <= 'z') ||
211 	    c == '_' || c == '\\')
212 	{
213 	    /* string */
214 	    char *cp;
215 
216 	    cp = db_tok_string;
217 	    if (c == '\\') {
218 		c = db_read_char();
219 		if (c == '\n' || c == -1)
220 		    db_error("Bad escape\n");
221 	    }
222 	    *cp++ = c;
223 	    while (1) {
224 		c = db_read_char();
225 		if ((c >= 'A' && c <= 'Z') ||
226 		    (c >= 'a' && c <= 'z') ||
227 		    (c >= '0' && c <= '9') ||
228 		    c == '_' || c == '\\' || c == ':')
229 		{
230 		    if (c == '\\') {
231 			c = db_read_char();
232 			if (c == '\n' || c == -1)
233 			    db_error("Bad escape\n");
234 		    }
235 		    *cp++ = c;
236 		    if (cp == db_tok_string+sizeof(db_tok_string)) {
237 			db_error("String too long\n");
238 			db_flush_lex();
239 			return (tEOF);
240 		    }
241 		    continue;
242 		}
243 		else {
244 		    *cp = '\0';
245 		    break;
246 		}
247 	    }
248 	    db_unread_char(c);
249 	    return (tIDENT);
250 	}
251 
252 	switch (c) {
253 	    case '+':
254 		return (tPLUS);
255 	    case '-':
256 		return (tMINUS);
257 	    case '.':
258 		c = db_read_char();
259 		if (c == '.')
260 		    return (tDOTDOT);
261 		db_unread_char(c);
262 		return (tDOT);
263 	    case '*':
264 		return (tSTAR);
265 	    case '/':
266 		return (tSLASH);
267 	    case '=':
268 		return (tEQ);
269 	    case '%':
270 		return (tPCT);
271 	    case '#':
272 		return (tHASH);
273 	    case '(':
274 		return (tLPAREN);
275 	    case ')':
276 		return (tRPAREN);
277 	    case ',':
278 		return (tCOMMA);
279 	    case '"':
280 		return (tDITTO);
281 	    case '$':
282 		return (tDOLLAR);
283 	    case '!':
284 		return (tEXCL);
285 	    case '<':
286 		c = db_read_char();
287 		if (c == '<')
288 		    return (tSHIFT_L);
289 		db_unread_char(c);
290 		break;
291 	    case '>':
292 		c = db_read_char();
293 		if (c == '>')
294 		    return (tSHIFT_R);
295 		db_unread_char(c);
296 		break;
297 	    case -1:
298 		return (tEOF);
299 	}
300 	db_printf("Bad character\n");
301 	db_flush_lex();
302 	return (tEOF);
303 }
304