1 /* 2 * Copyright (c) 1985 Sun Microsystems, Inc. 3 * Copyright (c) 1980 The Regents of the University of California. 4 * Copyright (c) 1976 Board of Trustees of the University of Illinois. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms are permitted 8 * provided that the above copyright notice and this paragraph are 9 * duplicated in all such forms and that any documentation, 10 * advertising materials, and other materials related to such 11 * distribution and use acknowledge that the software was developed 12 * by the University of California, Berkeley, the University of Illinois, 13 * Urbana, and Sun Microsystems, Inc. The name of either University 14 * or Sun Microsystems may not be used to endorse or promote products 15 * derived from this software without specific prior written permission. 16 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR 17 * IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED 18 * WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE. 19 */ 20 21 #ifndef lint 22 static char sccsid[] = "@(#)lexi.c 5.14 (Berkeley) 03/05/90"; 23 #endif /* not lint */ 24 25 /* 26 * Here we have the token scanner for indent. It scans off one token and puts 27 * it in the global variable "token". It returns a code, indicating the type 28 * of token scanned. 29 */ 30 31 #include "indent_globs.h" 32 #include "indent_codes.h" 33 #include <ctype.h> 34 35 #define alphanum 1 36 #define opchar 3 37 38 struct templ { 39 char *rwd; 40 int rwcode; 41 }; 42 43 struct templ specials[100] = 44 { 45 "switch", 1, 46 "case", 2, 47 "break", 0, 48 "struct", 3, 49 "union", 3, 50 "enum", 3, 51 "default", 2, 52 "int", 4, 53 "char", 4, 54 "float", 4, 55 "double", 4, 56 "long", 4, 57 "short", 4, 58 "typdef", 4, 59 "unsigned", 4, 60 "register", 4, 61 "static", 4, 62 "global", 4, 63 "extern", 4, 64 "void", 4, 65 "goto", 0, 66 "return", 0, 67 "if", 5, 68 "while", 5, 69 "for", 5, 70 "else", 6, 71 "do", 6, 72 "sizeof", 7, 73 0, 0 74 }; 75 76 char chartype[128] = 77 { /* this is used to facilitate the decision of 78 * what type (alphanumeric, operator) each 79 * character is */ 80 0, 0, 0, 0, 0, 0, 0, 0, 81 0, 0, 0, 0, 0, 0, 0, 0, 82 0, 0, 0, 0, 0, 0, 0, 0, 83 0, 0, 0, 0, 0, 0, 0, 0, 84 0, 3, 0, 0, 1, 3, 3, 0, 85 0, 0, 3, 3, 0, 3, 0, 3, 86 1, 1, 1, 1, 1, 1, 1, 1, 87 1, 1, 0, 0, 3, 3, 3, 3, 88 0, 1, 1, 1, 1, 1, 1, 1, 89 1, 1, 1, 1, 1, 1, 1, 1, 90 1, 1, 1, 1, 1, 1, 1, 1, 91 1, 1, 1, 0, 0, 0, 3, 1, 92 0, 1, 1, 1, 1, 1, 1, 1, 93 1, 1, 1, 1, 1, 1, 1, 1, 94 1, 1, 1, 1, 1, 1, 1, 1, 95 1, 1, 1, 0, 3, 0, 3, 0 96 }; 97 98 99 100 101 int 102 lexi() 103 { 104 int unary_delim; /* this is set to 1 if the current token 105 * 106 * forces a following operator to be unary */ 107 static int last_code; /* the last token type returned */ 108 static int l_struct; /* set to 1 if the last token was 'struct' */ 109 int code; /* internal code to be returned */ 110 char qchar; /* the delimiter character for a string */ 111 112 e_token = s_token; /* point to start of place to save token */ 113 unary_delim = false; 114 ps.col_1 = ps.last_nl; /* tell world that this token started in 115 * column 1 iff the last thing scanned was nl */ 116 ps.last_nl = false; 117 118 while (*buf_ptr == ' ' || *buf_ptr == '\t') { /* get rid of blanks */ 119 ps.col_1 = false; /* leading blanks imply token is not in column 120 * 1 */ 121 if (++buf_ptr >= buf_end) 122 fill_buffer(); 123 } 124 125 /* Scan an alphanumeric token */ 126 if (chartype[*buf_ptr] == alphanum || buf_ptr[0] == '.' && isdigit(buf_ptr[1])) { 127 /* 128 * we have a character or number 129 */ 130 register char *j; /* used for searching thru list of 131 * 132 * reserved words */ 133 register struct templ *p; 134 135 if (isdigit(*buf_ptr) || buf_ptr[0] == '.' && isdigit(buf_ptr[1])) { 136 int seendot = 0, 137 seenexp = 0; 138 if (*buf_ptr == '0' && 139 (buf_ptr[1] == 'x' || buf_ptr[1] == 'X')) { 140 *e_token++ = *buf_ptr++; 141 *e_token++ = *buf_ptr++; 142 while (isxdigit(*buf_ptr)) { 143 CHECK_SIZE_TOKEN; 144 *e_token++ = *buf_ptr++; 145 } 146 } 147 else 148 while (1) { 149 if (*buf_ptr == '.') 150 if (seendot) 151 break; 152 else 153 seendot++; 154 CHECK_SIZE_TOKEN; 155 *e_token++ = *buf_ptr++; 156 if (!isdigit(*buf_ptr) && *buf_ptr != '.') 157 if ((*buf_ptr != 'E' && *buf_ptr != 'e') || seenexp) 158 break; 159 else { 160 seenexp++; 161 seendot++; 162 CHECK_SIZE_TOKEN; 163 *e_token++ = *buf_ptr++; 164 if (*buf_ptr == '+' || *buf_ptr == '-') 165 *e_token++ = *buf_ptr++; 166 } 167 } 168 if (*buf_ptr == 'L' || *buf_ptr == 'l') 169 *e_token++ = *buf_ptr++; 170 } 171 else 172 while (chartype[*buf_ptr] == alphanum) { /* copy it over */ 173 CHECK_SIZE_TOKEN; 174 *e_token++ = *buf_ptr++; 175 if (buf_ptr >= buf_end) 176 fill_buffer(); 177 } 178 *e_token++ = '\0'; 179 while (*buf_ptr == ' ' || *buf_ptr == '\t') { /* get rid of blanks */ 180 if (++buf_ptr >= buf_end) 181 fill_buffer(); 182 } 183 ps.its_a_keyword = false; 184 ps.sizeof_keyword = false; 185 if (l_struct) { /* if last token was 'struct', then this token 186 * should be treated as a declaration */ 187 l_struct = false; 188 last_code = ident; 189 ps.last_u_d = true; 190 return (decl); 191 } 192 ps.last_u_d = false; /* Operator after indentifier is binary */ 193 last_code = ident; /* Remember that this is the code we will 194 * return */ 195 196 /* 197 * This loop will check if the token is a keyword. 198 */ 199 for (p = specials; (j = p->rwd) != 0; p++) { 200 register char *p = s_token; /* point at scanned token */ 201 if (*j++ != *p++ || *j++ != *p++) 202 continue; /* This test depends on the fact that 203 * identifiers are always at least 1 character 204 * long (ie. the first two bytes of the 205 * identifier are always meaningful) */ 206 if (p[-1] == 0) 207 break; /* If its a one-character identifier */ 208 while (*p++ == *j) 209 if (*j++ == 0) 210 goto found_keyword; /* I wish that C had a multi-level 211 * break... */ 212 } 213 if (p->rwd) { /* we have a keyword */ 214 found_keyword: 215 ps.its_a_keyword = true; 216 ps.last_u_d = true; 217 switch (p->rwcode) { 218 case 1: /* it is a switch */ 219 return (swstmt); 220 case 2: /* a case or default */ 221 return (casestmt); 222 223 case 3: /* a "struct" */ 224 if (ps.p_l_follow) 225 break; /* inside parens: cast */ 226 l_struct = true; 227 228 /* 229 * Next time around, we will want to know that we have had a 230 * 'struct' 231 */ 232 case 4: /* one of the declaration keywords */ 233 if (ps.p_l_follow) { 234 ps.cast_mask |= 1 << ps.p_l_follow; 235 break; /* inside parens: cast */ 236 } 237 last_code = decl; 238 return (decl); 239 240 case 5: /* if, while, for */ 241 return (sp_paren); 242 243 case 6: /* do, else */ 244 return (sp_nparen); 245 246 case 7: 247 ps.sizeof_keyword = true; 248 default: /* all others are treated like any other 249 * identifier */ 250 return (ident); 251 } /* end of switch */ 252 } /* end of if (found_it) */ 253 if (*buf_ptr == '(' && ps.tos <= 1 && ps.ind_level == 0) { 254 register char *tp = buf_ptr; 255 while (tp < buf_end) 256 if (*tp++ == ')' && (*tp == ';' || *tp == ',')) 257 goto not_proc; 258 strncpy(ps.procname, token, sizeof ps.procname - 1); 259 ps.in_parameter_declaration = 1; 260 rparen_count = 1; 261 not_proc:; 262 } 263 /* 264 * The following hack attempts to guess whether or not the current 265 * token is in fact a declaration keyword -- one that has been 266 * typedefd 267 */ 268 if (((*buf_ptr == '*' && buf_ptr[1] != '=') || isalpha(*buf_ptr) || *buf_ptr == '_') 269 && !ps.p_l_follow 270 && !ps.block_init 271 && (ps.last_token == rparen || ps.last_token == semicolon || 272 ps.last_token == decl || 273 ps.last_token == lbrace || ps.last_token == rbrace)) { 274 ps.its_a_keyword = true; 275 ps.last_u_d = true; 276 last_code = decl; 277 return decl; 278 } 279 if (last_code == decl) /* if this is a declared variable, then 280 * following sign is unary */ 281 ps.last_u_d = true; /* will make "int a -1" work */ 282 last_code = ident; 283 return (ident); /* the ident is not in the list */ 284 } /* end of procesing for alpanum character */ 285 286 /* Scan a non-alphanumeric token */ 287 288 *e_token++ = *buf_ptr; /* if it is only a one-character token, it is 289 * moved here */ 290 *e_token = '\0'; 291 if (++buf_ptr >= buf_end) 292 fill_buffer(); 293 294 switch (*token) { 295 case '\n': 296 unary_delim = ps.last_u_d; 297 ps.last_nl = true; /* remember that we just had a newline */ 298 code = (had_eof ? 0 : newline); 299 300 /* 301 * if data has been exausted, the newline is a dummy, and we should 302 * return code to stop 303 */ 304 break; 305 306 case '\'': /* start of quoted character */ 307 case '"': /* start of string */ 308 qchar = *token; 309 if (troff) { 310 e_token[-1] = '`'; 311 if (qchar == '"') 312 *e_token++ = '`'; 313 e_token = chfont(&bodyf, &stringf, e_token); 314 } 315 do { /* copy the string */ 316 while (1) { /* move one character or [/<char>]<char> */ 317 if (*buf_ptr == '\n') { 318 printf("%d: Unterminated literal\n", line_no); 319 goto stop_lit; 320 } 321 CHECK_SIZE_TOKEN; /* Only have to do this once in this loop, 322 * since CHECK_SIZE guarantees that there 323 * are at least 5 entries left */ 324 *e_token = *buf_ptr++; 325 if (buf_ptr >= buf_end) 326 fill_buffer(); 327 if (*e_token == BACKSLASH) { /* if escape, copy extra char */ 328 if (*buf_ptr == '\n') /* check for escaped newline */ 329 ++line_no; 330 if (troff) { 331 *++e_token = BACKSLASH; 332 if (*buf_ptr == BACKSLASH) 333 *++e_token = BACKSLASH; 334 } 335 *++e_token = *buf_ptr++; 336 ++e_token; /* we must increment this again because we 337 * copied two chars */ 338 if (buf_ptr >= buf_end) 339 fill_buffer(); 340 } 341 else 342 break; /* we copied one character */ 343 } /* end of while (1) */ 344 } while (*e_token++ != qchar); 345 if (troff) { 346 e_token = chfont(&stringf, &bodyf, e_token - 1); 347 if (qchar == '"') 348 *e_token++ = '\''; 349 } 350 stop_lit: 351 code = ident; 352 break; 353 354 case ('('): 355 case ('['): 356 unary_delim = true; 357 code = lparen; 358 break; 359 360 case (')'): 361 case (']'): 362 code = rparen; 363 break; 364 365 case '#': 366 unary_delim = ps.last_u_d; 367 code = preesc; 368 break; 369 370 case '?': 371 unary_delim = true; 372 code = question; 373 break; 374 375 case (':'): 376 code = colon; 377 unary_delim = true; 378 break; 379 380 case (';'): 381 unary_delim = true; 382 code = semicolon; 383 break; 384 385 case ('{'): 386 unary_delim = true; 387 388 /* 389 * if (ps.in_or_st) ps.block_init = 1; 390 */ 391 /* ? code = ps.block_init ? lparen : lbrace; */ 392 code = lbrace; 393 break; 394 395 case ('}'): 396 unary_delim = true; 397 /* ? code = ps.block_init ? rparen : rbrace; */ 398 code = rbrace; 399 break; 400 401 case 014: /* a form feed */ 402 unary_delim = ps.last_u_d; 403 ps.last_nl = true; /* remember this so we can set 'ps.col_1' 404 * right */ 405 code = form_feed; 406 break; 407 408 case (','): 409 unary_delim = true; 410 code = comma; 411 break; 412 413 case '.': 414 unary_delim = false; 415 code = period; 416 break; 417 418 case '-': 419 case '+': /* check for -, +, --, ++ */ 420 code = (ps.last_u_d ? unary_op : binary_op); 421 unary_delim = true; 422 423 if (*buf_ptr == token[0]) { 424 /* check for doubled character */ 425 *e_token++ = *buf_ptr++; 426 /* buffer overflow will be checked at end of loop */ 427 if (last_code == ident || last_code == rparen) { 428 code = (ps.last_u_d ? unary_op : postop); 429 /* check for following ++ or -- */ 430 unary_delim = false; 431 } 432 } 433 else if (*buf_ptr == '=') 434 /* check for operator += */ 435 *e_token++ = *buf_ptr++; 436 else if (*buf_ptr == '>') { 437 /* check for operator -> */ 438 *e_token++ = *buf_ptr++; 439 if (!pointer_as_binop) { 440 unary_delim = false; 441 code = unary_op; 442 ps.want_blank = false; 443 } 444 } 445 break; /* buffer overflow will be checked at end of 446 * switch */ 447 448 case '=': 449 if (ps.in_or_st) 450 ps.block_init = 1; 451 #ifdef undef 452 if (chartype[*buf_ptr] == opchar) { /* we have two char assignment */ 453 e_token[-1] = *buf_ptr++; 454 if ((e_token[-1] == '<' || e_token[-1] == '>') && e_token[-1] == *buf_ptr) 455 *e_token++ = *buf_ptr++; 456 *e_token++ = '='; /* Flip =+ to += */ 457 *e_token = 0; 458 } 459 #else 460 if (*buf_ptr == '=') {/* == */ 461 *e_token++ = '='; /* Flip =+ to += */ 462 buf_ptr++; 463 *e_token = 0; 464 } 465 #endif 466 code = binary_op; 467 unary_delim = true; 468 break; 469 /* can drop thru!!! */ 470 471 case '>': 472 case '<': 473 case '!': /* ops like <, <<, <=, !=, etc */ 474 if (*buf_ptr == '>' || *buf_ptr == '<' || *buf_ptr == '=') { 475 *e_token++ = *buf_ptr; 476 if (++buf_ptr >= buf_end) 477 fill_buffer(); 478 } 479 if (*buf_ptr == '=') 480 *e_token++ = *buf_ptr++; 481 code = (ps.last_u_d ? unary_op : binary_op); 482 unary_delim = true; 483 break; 484 485 default: 486 if (token[0] == '/' && *buf_ptr == '*') { 487 /* it is start of comment */ 488 *e_token++ = '*'; 489 490 if (++buf_ptr >= buf_end) 491 fill_buffer(); 492 493 code = comment; 494 unary_delim = ps.last_u_d; 495 break; 496 } 497 while (*(e_token - 1) == *buf_ptr || *buf_ptr == '=') { 498 /* 499 * handle ||, &&, etc, and also things as in int *****i 500 */ 501 *e_token++ = *buf_ptr; 502 if (++buf_ptr >= buf_end) 503 fill_buffer(); 504 } 505 code = (ps.last_u_d ? unary_op : binary_op); 506 unary_delim = true; 507 508 509 } /* end of switch */ 510 if (code != newline) { 511 l_struct = false; 512 last_code = code; 513 } 514 if (buf_ptr >= buf_end) /* check for input buffer empty */ 515 fill_buffer(); 516 ps.last_u_d = unary_delim; 517 *e_token = '\0'; /* null terminate the token */ 518 return (code); 519 } 520 521 /* 522 * Add the given keyword to the keyword table, using val as the keyword type 523 */ 524 addkey(key, val) 525 char *key; 526 { 527 register struct templ *p = specials; 528 while (p->rwd) 529 if (p->rwd[0] == key[0] && strcmp(p->rwd, key) == 0) 530 return; 531 else 532 p++; 533 if (p >= specials + sizeof specials / sizeof specials[0]) 534 return; /* For now, table overflows are silently 535 * ignored */ 536 p->rwd = key; 537 p->rwcode = val; 538 p[1].rwd = 0; 539 p[1].rwcode = 0; 540 return; 541 } 542