xref: /netbsd-src/sys/ddb/db_lex.c (revision d710132b4b8ce7f7cccaaf660cb16aa16b4077a0)
1 /*	$NetBSD: db_lex.c,v 1.18 2003/05/17 09:58:03 scw Exp $	*/
2 
3 /*
4  * Mach Operating System
5  * Copyright (c) 1991,1990 Carnegie Mellon University
6  * All Rights Reserved.
7  *
8  * Permission to use, copy, modify and distribute this software and its
9  * documentation is hereby granted, provided that both the copyright
10  * notice and this permission notice appear in all copies of the
11  * software, derivative works or modified versions, and any portions
12  * thereof, and that both notices appear in supporting documentation.
13  *
14  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
15  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
16  * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
17  *
18  * Carnegie Mellon requests users of this software to return to
19  *
20  *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
21  *  School of Computer Science
22  *  Carnegie Mellon University
23  *  Pittsburgh PA 15213-3890
24  *
25  * any improvements or extensions that they make and grant Carnegie the
26  * rights to redistribute these changes.
27  *
28  *	Author: David B. Golub, Carnegie Mellon University
29  *	Date:	7/90
30  */
31 
32 /*
33  * Lexical analyzer.
34  */
35 
36 #include <sys/cdefs.h>
37 __KERNEL_RCSID(0, "$NetBSD: db_lex.c,v 1.18 2003/05/17 09:58:03 scw Exp $");
38 
39 #include <sys/param.h>
40 #include <sys/systm.h>
41 
42 #include <machine/db_machdep.h>
43 
44 #include <ddb/db_lex.h>
45 #include <ddb/db_output.h>
46 #include <ddb/db_command.h>
47 #include <ddb/db_sym.h>
48 #include <ddb/db_extern.h>
49 #include <ddb/db_interface.h>
50 
51 db_expr_t	db_tok_number;
52 char		db_tok_string[TOK_STRING_SIZE];
53 
54 static char	db_line[120];
55 static char    *db_lp, *db_endlp;
56 
57 static int	db_look_char = 0;
58 static int	db_look_token = 0;
59 
60 static void	db_flush_line(void);
61 static int	db_read_char(void);
62 static void	db_unread_char(int);
63 static int	db_lex(void);
64 
65 int
66 db_read_line(void)
67 {
68 	int	i;
69 
70 	i = db_readline(db_line, sizeof(db_line));
71 	if (i == 0)
72 		return (0);	/* EOI */
73 	db_lp = db_line;
74 	db_endlp = db_lp + i;
75 	return (i);
76 }
77 
78 static void
79 db_flush_line(void)
80 {
81 
82 	db_lp = db_line;
83 	db_endlp = db_line;
84 }
85 
86 static int
87 db_read_char(void)
88 {
89 	int	c;
90 
91 	if (db_look_char != 0) {
92 		c = db_look_char;
93 		db_look_char = 0;
94 	}
95 	else if (db_lp >= db_endlp)
96 		c = -1;
97 	else
98 		c = *db_lp++;
99 	return (c);
100 }
101 
102 static void
103 db_unread_char(int c)
104 {
105 
106 	db_look_char = c;
107 }
108 
109 void
110 db_unread_token(int t)
111 {
112 
113 	db_look_token = t;
114 }
115 
116 int
117 db_read_token(void)
118 {
119 	int	t;
120 
121 	if (db_look_token) {
122 		t = db_look_token;
123 		db_look_token = 0;
124 	}
125 	else
126 		t = db_lex();
127 	return (t);
128 }
129 
130 int	db_radix = 16;
131 
132 /*
133  * Convert the number to a string in the current radix.
134  * This replaces the non-standard %n printf() format.
135  */
136 
137 char *
138 db_num_to_str(db_expr_t val)
139 {
140 
141 	/*
142 	 * 2 chars for "0x", 1 for a sign ("-")
143 	 * up to 21 chars for a 64-bit number:
144 	 *   % echo 2^64 | bc | wc -c
145 	 *   21
146 	 * and 1 char for a terminal NUL
147 	 * 2+1+21+1 => 25
148 	 */
149 	static char buf[25];
150 
151 	if (db_radix == 16)
152 		snprintf(buf, sizeof(buf), DB_EXPR_T_IS_QUAD ? "%#qx" : "%#lx",
153 		    val);
154 	else if (db_radix == 8)
155 		snprintf(buf, sizeof(buf), DB_EXPR_T_IS_QUAD ? "%#qo" : "%#lo",
156 		    val);
157 	else
158 		snprintf(buf, sizeof(buf), DB_EXPR_T_IS_QUAD ? "%qu" : "%lu",
159 		    val);
160 
161 	return (buf);
162 }
163 
164 void
165 db_flush_lex(void)
166 {
167 
168 	db_flush_line();
169 	db_look_char = 0;
170 	db_look_token = 0;
171 }
172 
173 static int
174 db_lex(void)
175 {
176 	int	c;
177 
178 	c = db_read_char();
179 	while (c <= ' ' || c > '~') {
180 		if (c == '\n' || c == -1)
181 			return (tEOL);
182 		c = db_read_char();
183 	}
184 
185 	if (c >= '0' && c <= '9') {
186 		/* number */
187 		db_expr_t	r, digit = 0;
188 
189 		if (c > '0')
190 			r = db_radix;
191 		else {
192 			c = db_read_char();
193 			if (c == 'O' || c == 'o')
194 				r = 8;
195 			else if (c == 'T' || c == 't')
196 				r = 10;
197 			else if (c == 'X' || c == 'x')
198 				r = 16;
199 			else {
200 				r = db_radix;
201 				db_unread_char(c);
202 			}
203 			c = db_read_char();
204 		}
205 		db_tok_number = 0;
206 		for (;;) {
207 			if (c >= '0' && c <= ((r == 8) ? '7' : '9'))
208 				digit = c - '0';
209 			else if (r == 16 && ((c >= 'A' && c <= 'F') ||
210 				(c >= 'a' && c <= 'f'))) {
211 				if (c >= 'a')
212 					digit = c - 'a' + 10;
213 				else if (c >= 'A')
214 					digit = c - 'A' + 10;
215 			}
216 			else
217 				break;
218 			db_tok_number = db_tok_number * r + digit;
219 			c = db_read_char();
220 		}
221 		if ((c >= '0' && c <= '9') ||
222 		    (c >= 'A' && c <= 'Z') ||
223 		    (c >= 'a' && c <= 'z') ||
224 		    (c == '_')) {
225 			db_error("Bad character in number\n");
226 			/*NOTREACHED*/
227 		}
228 		db_unread_char(c);
229 		return (tNUMBER);
230 	}
231 	if ((c >= 'A' && c <= 'Z') ||
232 	    (c >= 'a' && c <= 'z') ||
233 	    c == '_' || c == '\\') {
234 		/* string */
235 		char *cp;
236 
237 		cp = db_tok_string;
238 		if (c == '\\') {
239 			c = db_read_char();
240 			if (c == '\n' || c == -1) {
241 				db_error("Bad escape\n");
242 				/*NOTREACHED*/
243 			}
244 		}
245 		*cp++ = c;
246 		while (1) {
247 			c = db_read_char();
248 			if ((c >= 'A' && c <= 'Z') ||
249 			    (c >= 'a' && c <= 'z') ||
250 			    (c >= '0' && c <= '9') ||
251 			    c == '_' || c == '\\' || c == ':') {
252 				if (c == '\\') {
253 					c = db_read_char();
254 					if (c == '\n' || c == -1) {
255 						db_error("Bad escape\n");
256 						/*NOTREACHED*/
257 					}
258 				}
259 				*cp++ = c;
260 				if (cp == db_tok_string+sizeof(db_tok_string)) {
261 					db_error("String too long\n");
262 					/*NOTREACHED*/
263 				}
264 				continue;
265 			} else {
266 				*cp = '\0';
267 				break;
268 			}
269 		}
270 		db_unread_char(c);
271 		return (tIDENT);
272 	}
273 
274 	switch (c) {
275 	case '+':
276 		return (tPLUS);
277 	case '-':
278 		return (tMINUS);
279 	case '.':
280 		c = db_read_char();
281 		if (c == '.')
282 			return (tDOTDOT);
283 		db_unread_char(c);
284 		return (tDOT);
285 	case '*':
286 		return (tSTAR);
287 	case '/':
288 		return (tSLASH);
289 	case '=':
290 		return (tEQ);
291 	case '%':
292 		return (tPCT);
293 	case '#':
294 		return (tHASH);
295 	case '(':
296 		return (tLPAREN);
297 	case ')':
298 		return (tRPAREN);
299 	case ',':
300 		return (tCOMMA);
301 	case '"':
302 		return (tDITTO);
303 	case '$':
304 		return (tDOLLAR);
305 	case '!':
306 		return (tEXCL);
307 	case '<':
308 		c = db_read_char();
309 		if (c == '<')
310 			return (tSHIFT_L);
311 		db_unread_char(c);
312 		break;
313 	case '>':
314 		c = db_read_char();
315 		if (c == '>')
316 			return (tSHIFT_R);
317 		db_unread_char(c);
318 		break;
319 	case -1:
320 		return (tEOF);
321 	}
322 	db_printf("Bad character\n");
323 	db_flush_lex();
324 	return (tEOF);
325 }
326