1 /* 2 * Mach Operating System 3 * Copyright (c) 1991,1990 Carnegie Mellon University 4 * All Rights Reserved. 5 * 6 * Permission to use, copy, modify and distribute this software and its 7 * documentation is hereby granted, provided that both the copyright 8 * notice and this permission notice appear in all copies of the 9 * software, derivative works or modified versions, and any portions 10 * thereof, and that both notices appear in supporting documentation. 11 * 12 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS 13 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR 14 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. 15 * 16 * Carnegie Mellon requests users of this software to return to 17 * 18 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU 19 * School of Computer Science 20 * Carnegie Mellon University 21 * Pittsburgh PA 15213-3890 22 * 23 * any improvements or extensions that they make and grant Carnegie the 24 * rights to redistribute these changes. 25 */ 26 /* 27 * $Id: db_lex.c,v 1.2 1993/05/20 03:39:16 cgd Exp $ 28 * 29 * HISTORY 30 * $Log: db_lex.c,v $ 31 * Revision 1.2 1993/05/20 03:39:16 cgd 32 * add explicit rcs id 33 * 34 * Revision 1.1.1.1 1993/03/21 09:46:26 cgd 35 * initial import of 386bsd-0.1 sources 36 * 37 * Revision 1.1 1992/03/25 21:45:13 pace 38 * Initial revision 39 * 40 * Revision 2.3 91/02/05 17:06:36 mrt 41 * Changed to new Mach copyright 42 * [91/01/31 16:18:20 mrt] 43 * 44 * Revision 2.2 90/08/27 21:51:10 dbg 45 * Add 'dotdot' token. 46 * [90/08/22 dbg] 47 * 48 * Allow backslash to quote any character into an identifier. 49 * Allow colon in identifier for symbol table qualification. 50 * [90/08/16 dbg] 51 * Reduce lint. 52 * [90/08/07 dbg] 53 * Created. 54 * [90/07/25 dbg] 55 * 56 */ 57 /* 58 * Author: David B. Golub, Carnegie Mellon University 59 * Date: 7/90 60 */ 61 /* 62 * Lexical analyzer. 63 */ 64 #include <ddb/db_lex.h> 65 66 char db_line[120]; 67 char * db_lp, *db_endlp; 68 69 int 70 db_read_line() 71 { 72 int i; 73 74 i = db_readline(db_line, sizeof(db_line)); 75 if (i == 0) 76 return (0); /* EOI */ 77 db_lp = db_line; 78 db_endlp = db_lp + i; 79 return (i); 80 } 81 82 void 83 db_flush_line() 84 { 85 db_lp = db_line; 86 db_endlp = db_line; 87 } 88 89 int db_look_char = 0; 90 91 int 92 db_read_char() 93 { 94 int c; 95 96 if (db_look_char != 0) { 97 c = db_look_char; 98 db_look_char = 0; 99 } 100 else if (db_lp >= db_endlp) 101 c = -1; 102 else 103 c = *db_lp++; 104 return (c); 105 } 106 107 void 108 db_unread_char(c) 109 { 110 db_look_char = c; 111 } 112 113 int db_look_token = 0; 114 115 void 116 db_unread_token(t) 117 int t; 118 { 119 db_look_token = t; 120 } 121 122 int 123 db_read_token() 124 { 125 int t; 126 127 if (db_look_token) { 128 t = db_look_token; 129 db_look_token = 0; 130 } 131 else 132 t = db_lex(); 133 return (t); 134 } 135 136 int db_tok_number; 137 char db_tok_string[TOK_STRING_SIZE]; 138 139 int db_radix = 16; 140 141 void 142 db_flush_lex() 143 { 144 db_flush_line(); 145 db_look_char = 0; 146 db_look_token = 0; 147 } 148 149 int 150 db_lex() 151 { 152 int c; 153 154 c = db_read_char(); 155 while (c <= ' ' || c > '~') { 156 if (c == '\n' || c == -1) 157 return (tEOL); 158 c = db_read_char(); 159 } 160 161 if (c >= '0' && c <= '9') { 162 /* number */ 163 int r, digit; 164 165 if (c > '0') 166 r = db_radix; 167 else { 168 c = db_read_char(); 169 if (c == 'O' || c == 'o') 170 r = 8; 171 else if (c == 'T' || c == 't') 172 r = 10; 173 else if (c == 'X' || c == 'x') 174 r = 16; 175 else { 176 r = db_radix; 177 db_unread_char(c); 178 } 179 c = db_read_char(); 180 } 181 db_tok_number = 0; 182 for (;;) { 183 if (c >= '0' && c <= ((r == 8) ? '7' : '9')) 184 digit = c - '0'; 185 else if (r == 16 && ((c >= 'A' && c <= 'F') || 186 (c >= 'a' && c <= 'f'))) { 187 if (c >= 'a') 188 digit = c - 'a' + 10; 189 else if (c >= 'A') 190 digit = c - 'A' + 10; 191 } 192 else 193 break; 194 db_tok_number = db_tok_number * r + digit; 195 c = db_read_char(); 196 } 197 if ((c >= '0' && c <= '9') || 198 (c >= 'A' && c <= 'Z') || 199 (c >= 'a' && c <= 'z') || 200 (c == '_')) 201 { 202 db_error("Bad character in number\n"); 203 db_flush_lex(); 204 return (tEOF); 205 } 206 db_unread_char(c); 207 return (tNUMBER); 208 } 209 if ((c >= 'A' && c <= 'Z') || 210 (c >= 'a' && c <= 'z') || 211 c == '_' || c == '\\') 212 { 213 /* string */ 214 char *cp; 215 216 cp = db_tok_string; 217 if (c == '\\') { 218 c = db_read_char(); 219 if (c == '\n' || c == -1) 220 db_error("Bad escape\n"); 221 } 222 *cp++ = c; 223 while (1) { 224 c = db_read_char(); 225 if ((c >= 'A' && c <= 'Z') || 226 (c >= 'a' && c <= 'z') || 227 (c >= '0' && c <= '9') || 228 c == '_' || c == '\\' || c == ':') 229 { 230 if (c == '\\') { 231 c = db_read_char(); 232 if (c == '\n' || c == -1) 233 db_error("Bad escape\n"); 234 } 235 *cp++ = c; 236 if (cp == db_tok_string+sizeof(db_tok_string)) { 237 db_error("String too long\n"); 238 db_flush_lex(); 239 return (tEOF); 240 } 241 continue; 242 } 243 else { 244 *cp = '\0'; 245 break; 246 } 247 } 248 db_unread_char(c); 249 return (tIDENT); 250 } 251 252 switch (c) { 253 case '+': 254 return (tPLUS); 255 case '-': 256 return (tMINUS); 257 case '.': 258 c = db_read_char(); 259 if (c == '.') 260 return (tDOTDOT); 261 db_unread_char(c); 262 return (tDOT); 263 case '*': 264 return (tSTAR); 265 case '/': 266 return (tSLASH); 267 case '=': 268 return (tEQ); 269 case '%': 270 return (tPCT); 271 case '#': 272 return (tHASH); 273 case '(': 274 return (tLPAREN); 275 case ')': 276 return (tRPAREN); 277 case ',': 278 return (tCOMMA); 279 case '"': 280 return (tDITTO); 281 case '$': 282 return (tDOLLAR); 283 case '!': 284 return (tEXCL); 285 case '<': 286 c = db_read_char(); 287 if (c == '<') 288 return (tSHIFT_L); 289 db_unread_char(c); 290 break; 291 case '>': 292 c = db_read_char(); 293 if (c == '>') 294 return (tSHIFT_R); 295 db_unread_char(c); 296 break; 297 case -1: 298 return (tEOF); 299 } 300 db_printf("Bad character\n"); 301 db_flush_lex(); 302 return (tEOF); 303 } 304