xref: /netbsd-src/sys/ddb/db_lex.c (revision b1c86f5f087524e68db12794ee9c3e3da1ab17a0)
1 /*	$NetBSD: db_lex.c,v 1.21 2009/03/07 22:02:17 ad Exp $	*/
2 
3 /*
4  * Mach Operating System
5  * Copyright (c) 1991,1990 Carnegie Mellon University
6  * All Rights Reserved.
7  *
8  * Permission to use, copy, modify and distribute this software and its
9  * documentation is hereby granted, provided that both the copyright
10  * notice and this permission notice appear in all copies of the
11  * software, derivative works or modified versions, and any portions
12  * thereof, and that both notices appear in supporting documentation.
13  *
14  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
15  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
16  * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
17  *
18  * Carnegie Mellon requests users of this software to return to
19  *
20  *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
21  *  School of Computer Science
22  *  Carnegie Mellon University
23  *  Pittsburgh PA 15213-3890
24  *
25  * any improvements or extensions that they make and grant Carnegie the
26  * rights to redistribute these changes.
27  *
28  *	Author: David B. Golub, Carnegie Mellon University
29  *	Date:	7/90
30  */
31 
32 /*
33  * Lexical analyzer.
34  */
35 
36 #include <sys/cdefs.h>
37 __KERNEL_RCSID(0, "$NetBSD: db_lex.c,v 1.21 2009/03/07 22:02:17 ad Exp $");
38 
39 #include <sys/param.h>
40 #include <sys/systm.h>
41 
42 #include <ddb/ddb.h>
43 
44 db_expr_t	db_tok_number;
45 char		db_tok_string[TOK_STRING_SIZE];
46 
47 static char	db_line[DB_LINE_MAXLEN];
48 static const char *db_lp;
49 static const char *db_endlp;
50 
51 static int	db_look_char = 0;
52 static int	db_look_token = 0;
53 
54 static void	db_flush_line(void);
55 static int	db_read_char(void);
56 static void	db_unread_char(int);
57 static int	db_lex(void);
58 
59 int
60 db_read_line(void)
61 {
62 	int	i;
63 
64 	i = db_readline(db_line, sizeof(db_line));
65 	if (i == 0)
66 		return (0);	/* EOI */
67 	db_set_line(db_line, db_line + i);
68 	return (i);
69 }
70 
71 void
72 db_set_line(const char *sp, const char *ep)
73 {
74 
75 	db_lp = sp;
76 	db_endlp = ep;
77 }
78 
79 static void
80 db_flush_line(void)
81 {
82 
83 	db_lp = db_line;
84 	db_endlp = db_line;
85 }
86 
87 static int
88 db_read_char(void)
89 {
90 	int	c;
91 
92 	if (db_look_char != 0) {
93 		c = db_look_char;
94 		db_look_char = 0;
95 	}
96 	else if (db_lp >= db_endlp)
97 		c = -1;
98 	else
99 		c = *db_lp++;
100 	return (c);
101 }
102 
103 static void
104 db_unread_char(int c)
105 {
106 
107 	db_look_char = c;
108 }
109 
110 void
111 db_unread_token(int t)
112 {
113 
114 	db_look_token = t;
115 }
116 
117 int
118 db_read_token(void)
119 {
120 	int	t;
121 
122 	if (db_look_token) {
123 		t = db_look_token;
124 		db_look_token = 0;
125 	}
126 	else
127 		t = db_lex();
128 	return (t);
129 }
130 
131 int	db_radix = 16;
132 
133 /*
134  * Convert the number to a string in the current radix.
135  * This replaces the non-standard %n printf() format.
136  */
137 
138 char *
139 db_num_to_str(db_expr_t val)
140 {
141 
142 	/*
143 	 * 2 chars for "0x", 1 for a sign ("-")
144 	 * up to 21 chars for a 64-bit number:
145 	 *   % echo 2^64 | bc | wc -c
146 	 *   21
147 	 * and 1 char for a terminal NUL
148 	 * 2+1+21+1 => 25
149 	 */
150 	static char buf[25];
151 
152 	if (db_radix == 16)
153 		snprintf(buf, sizeof(buf), DB_EXPR_T_IS_QUAD ? "%#qx" : "%#lx",
154 		    val);
155 	else if (db_radix == 8)
156 		snprintf(buf, sizeof(buf), DB_EXPR_T_IS_QUAD ? "%#qo" : "%#lo",
157 		    val);
158 	else
159 		snprintf(buf, sizeof(buf), DB_EXPR_T_IS_QUAD ? "%qu" : "%lu",
160 		    val);
161 
162 	return (buf);
163 }
164 
165 void
166 db_flush_lex(void)
167 {
168 
169 	db_flush_line();
170 	db_look_char = 0;
171 	db_look_token = 0;
172 }
173 
174 static int
175 db_lex(void)
176 {
177 	int	c;
178 
179 	c = db_read_char();
180 	while (c <= ' ' || c > '~') {
181 		if (c == '\n' || c == -1)
182 			return (tEOL);
183 		c = db_read_char();
184 	}
185 
186 	if (c >= '0' && c <= '9') {
187 		/* number */
188 		db_expr_t	r, digit = 0;
189 
190 		if (c > '0')
191 			r = db_radix;
192 		else {
193 			c = db_read_char();
194 			if (c == 'O' || c == 'o')
195 				r = 8;
196 			else if (c == 'T' || c == 't')
197 				r = 10;
198 			else if (c == 'X' || c == 'x')
199 				r = 16;
200 			else {
201 				r = db_radix;
202 				db_unread_char(c);
203 			}
204 			c = db_read_char();
205 		}
206 		db_tok_number = 0;
207 		for (;;) {
208 			if (c >= '0' && c <= ((r == 8) ? '7' : '9'))
209 				digit = c - '0';
210 			else if (r == 16 && ((c >= 'A' && c <= 'F') ||
211 				(c >= 'a' && c <= 'f'))) {
212 				if (c >= 'a')
213 					digit = c - 'a' + 10;
214 				else if (c >= 'A')
215 					digit = c - 'A' + 10;
216 			}
217 			else
218 				break;
219 			db_tok_number = db_tok_number * r + digit;
220 			c = db_read_char();
221 		}
222 		if ((c >= '0' && c <= '9') ||
223 		    (c >= 'A' && c <= 'Z') ||
224 		    (c >= 'a' && c <= 'z') ||
225 		    (c == '_')) {
226 			db_error("Bad character in number\n");
227 			/*NOTREACHED*/
228 		}
229 		db_unread_char(c);
230 		return (tNUMBER);
231 	}
232 	if ((c >= 'A' && c <= 'Z') ||
233 	    (c >= 'a' && c <= 'z') ||
234 	    c == '_' || c == '\\') {
235 		/* string */
236 		char *cp;
237 
238 		cp = db_tok_string;
239 		if (c == '\\') {
240 			c = db_read_char();
241 			if (c == '\n' || c == -1) {
242 				db_error("Bad escape\n");
243 				/*NOTREACHED*/
244 			}
245 		}
246 		*cp++ = c;
247 		while (1) {
248 			c = db_read_char();
249 			if ((c >= 'A' && c <= 'Z') ||
250 			    (c >= 'a' && c <= 'z') ||
251 			    (c >= '0' && c <= '9') ||
252 			    c == '_' || c == '\\' || c == ':') {
253 				if (c == '\\') {
254 					c = db_read_char();
255 					if (c == '\n' || c == -1) {
256 						db_error("Bad escape\n");
257 						/*NOTREACHED*/
258 					}
259 				}
260 				*cp++ = c;
261 				if (cp == db_tok_string+sizeof(db_tok_string)) {
262 					db_error("String too long\n");
263 					/*NOTREACHED*/
264 				}
265 				continue;
266 			} else {
267 				*cp = '\0';
268 				break;
269 			}
270 		}
271 		db_unread_char(c);
272 		return (tIDENT);
273 	}
274 
275 	switch (c) {
276 	case '+':
277 		return (tPLUS);
278 	case '-':
279 		return (tMINUS);
280 	case '.':
281 		c = db_read_char();
282 		if (c == '.')
283 			return (tDOTDOT);
284 		db_unread_char(c);
285 		return (tDOT);
286 	case '*':
287 		return (tSTAR);
288 	case '/':
289 		return (tSLASH);
290 	case '=':
291 		return (tEQ);
292 	case '%':
293 		return (tPCT);
294 	case '#':
295 		return (tHASH);
296 	case '(':
297 		return (tLPAREN);
298 	case ')':
299 		return (tRPAREN);
300 	case ',':
301 		return (tCOMMA);
302 	case '"':
303 		return (tDITTO);
304 	case '$':
305 		return (tDOLLAR);
306 	case '!':
307 		return (tEXCL);
308 	case '<':
309 		c = db_read_char();
310 		if (c == '<')
311 			return (tSHIFT_L);
312 		db_unread_char(c);
313 		break;
314 	case '>':
315 		c = db_read_char();
316 		if (c == '>')
317 			return (tSHIFT_R);
318 		db_unread_char(c);
319 		break;
320 	case -1:
321 		return (tEOF);
322 	}
323 	db_printf("Bad character\n");
324 	db_flush_lex();
325 	return (tEOF);
326 }
327 
328 /*
329  * Utility routine - discard tokens through end-of-line.
330  */
331 void
332 db_skip_to_eol(void)
333 {
334 	int t;
335 
336 	do {
337 		t = db_read_token();
338 	} while (t != tEOL);
339 }
340 
341 void
342 db_error(const char *s)
343 {
344 
345 	if (s)
346 		db_printf("%s", s);
347 	db_flush_lex();
348 	longjmp(db_recover);
349 }
350