1 /* 2 * Copyright (c) 1985 Sun Microsystems, Inc. 3 * Copyright (c) 1980 The Regents of the University of California. 4 * Copyright (c) 1976 Board of Trustees of the University of Illinois. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 3. All advertising materials mentioning features or use of this software 16 * must display the following acknowledgement: 17 * This product includes software developed by the University of 18 * California, Berkeley and its contributors. 19 * 4. Neither the name of the University nor the names of its contributors 20 * may be used to endorse or promote products derived from this software 21 * without specific prior written permission. 22 * 23 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 24 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 25 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 26 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 27 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 28 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 29 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 30 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 31 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 32 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 33 * SUCH DAMAGE. 34 */ 35 36 #ifndef lint 37 /*static char sccsid[] = "from: @(#)lexi.c 5.16 (Berkeley) 2/26/91";*/ 38 static char rcsid[] = "$Id: lexi.c,v 1.2 1993/08/01 18:14:31 mycroft Exp $"; 39 #endif /* not lint */ 40 41 /* 42 * Here we have the token scanner for indent. It scans off one token and puts 43 * it in the global variable "token". It returns a code, indicating the type 44 * of token scanned. 45 */ 46 47 #include <stdio.h> 48 #include <ctype.h> 49 #include <stdlib.h> 50 #include <string.h> 51 #include "indent_globs.h" 52 #include "indent_codes.h" 53 54 #define alphanum 1 55 #define opchar 3 56 57 struct templ { 58 char *rwd; 59 int rwcode; 60 }; 61 62 struct templ specials[100] = 63 { 64 "switch", 1, 65 "case", 2, 66 "break", 0, 67 "struct", 3, 68 "union", 3, 69 "enum", 3, 70 "default", 2, 71 "int", 4, 72 "char", 4, 73 "float", 4, 74 "double", 4, 75 "long", 4, 76 "short", 4, 77 "typdef", 4, 78 "unsigned", 4, 79 "register", 4, 80 "static", 4, 81 "global", 4, 82 "extern", 4, 83 "void", 4, 84 "goto", 0, 85 "return", 0, 86 "if", 5, 87 "while", 5, 88 "for", 5, 89 "else", 6, 90 "do", 6, 91 "sizeof", 7, 92 0, 0 93 }; 94 95 char chartype[128] = 96 { /* this is used to facilitate the decision of 97 * what type (alphanumeric, operator) each 98 * character is */ 99 0, 0, 0, 0, 0, 0, 0, 0, 100 0, 0, 0, 0, 0, 0, 0, 0, 101 0, 0, 0, 0, 0, 0, 0, 0, 102 0, 0, 0, 0, 0, 0, 0, 0, 103 0, 3, 0, 0, 1, 3, 3, 0, 104 0, 0, 3, 3, 0, 3, 0, 3, 105 1, 1, 1, 1, 1, 1, 1, 1, 106 1, 1, 0, 0, 3, 3, 3, 3, 107 0, 1, 1, 1, 1, 1, 1, 1, 108 1, 1, 1, 1, 1, 1, 1, 1, 109 1, 1, 1, 1, 1, 1, 1, 1, 110 1, 1, 1, 0, 0, 0, 3, 1, 111 0, 1, 1, 1, 1, 1, 1, 1, 112 1, 1, 1, 1, 1, 1, 1, 1, 113 1, 1, 1, 1, 1, 1, 1, 1, 114 1, 1, 1, 0, 3, 0, 3, 0 115 }; 116 117 118 119 120 int 121 lexi() 122 { 123 int unary_delim; /* this is set to 1 if the current token 124 * 125 * forces a following operator to be unary */ 126 static int last_code; /* the last token type returned */ 127 static int l_struct; /* set to 1 if the last token was 'struct' */ 128 int code; /* internal code to be returned */ 129 char qchar; /* the delimiter character for a string */ 130 131 e_token = s_token; /* point to start of place to save token */ 132 unary_delim = false; 133 ps.col_1 = ps.last_nl; /* tell world that this token started in 134 * column 1 iff the last thing scanned was nl */ 135 ps.last_nl = false; 136 137 while (*buf_ptr == ' ' || *buf_ptr == '\t') { /* get rid of blanks */ 138 ps.col_1 = false; /* leading blanks imply token is not in column 139 * 1 */ 140 if (++buf_ptr >= buf_end) 141 fill_buffer(); 142 } 143 144 /* Scan an alphanumeric token */ 145 if (chartype[*buf_ptr] == alphanum || buf_ptr[0] == '.' && isdigit(buf_ptr[1])) { 146 /* 147 * we have a character or number 148 */ 149 register char *j; /* used for searching thru list of 150 * 151 * reserved words */ 152 register struct templ *p; 153 154 if (isdigit(*buf_ptr) || buf_ptr[0] == '.' && isdigit(buf_ptr[1])) { 155 int seendot = 0, 156 seenexp = 0; 157 if (*buf_ptr == '0' && 158 (buf_ptr[1] == 'x' || buf_ptr[1] == 'X')) { 159 *e_token++ = *buf_ptr++; 160 *e_token++ = *buf_ptr++; 161 while (isxdigit(*buf_ptr)) { 162 CHECK_SIZE_TOKEN; 163 *e_token++ = *buf_ptr++; 164 } 165 } 166 else 167 while (1) { 168 if (*buf_ptr == '.') 169 if (seendot) 170 break; 171 else 172 seendot++; 173 CHECK_SIZE_TOKEN; 174 *e_token++ = *buf_ptr++; 175 if (!isdigit(*buf_ptr) && *buf_ptr != '.') 176 if ((*buf_ptr != 'E' && *buf_ptr != 'e') || seenexp) 177 break; 178 else { 179 seenexp++; 180 seendot++; 181 CHECK_SIZE_TOKEN; 182 *e_token++ = *buf_ptr++; 183 if (*buf_ptr == '+' || *buf_ptr == '-') 184 *e_token++ = *buf_ptr++; 185 } 186 } 187 if (*buf_ptr == 'L' || *buf_ptr == 'l') 188 *e_token++ = *buf_ptr++; 189 } 190 else 191 while (chartype[*buf_ptr] == alphanum) { /* copy it over */ 192 CHECK_SIZE_TOKEN; 193 *e_token++ = *buf_ptr++; 194 if (buf_ptr >= buf_end) 195 fill_buffer(); 196 } 197 *e_token++ = '\0'; 198 while (*buf_ptr == ' ' || *buf_ptr == '\t') { /* get rid of blanks */ 199 if (++buf_ptr >= buf_end) 200 fill_buffer(); 201 } 202 ps.its_a_keyword = false; 203 ps.sizeof_keyword = false; 204 if (l_struct) { /* if last token was 'struct', then this token 205 * should be treated as a declaration */ 206 l_struct = false; 207 last_code = ident; 208 ps.last_u_d = true; 209 return (decl); 210 } 211 ps.last_u_d = false; /* Operator after indentifier is binary */ 212 last_code = ident; /* Remember that this is the code we will 213 * return */ 214 215 /* 216 * This loop will check if the token is a keyword. 217 */ 218 for (p = specials; (j = p->rwd) != 0; p++) { 219 register char *p = s_token; /* point at scanned token */ 220 if (*j++ != *p++ || *j++ != *p++) 221 continue; /* This test depends on the fact that 222 * identifiers are always at least 1 character 223 * long (ie. the first two bytes of the 224 * identifier are always meaningful) */ 225 if (p[-1] == 0) 226 break; /* If its a one-character identifier */ 227 while (*p++ == *j) 228 if (*j++ == 0) 229 goto found_keyword; /* I wish that C had a multi-level 230 * break... */ 231 } 232 if (p->rwd) { /* we have a keyword */ 233 found_keyword: 234 ps.its_a_keyword = true; 235 ps.last_u_d = true; 236 switch (p->rwcode) { 237 case 1: /* it is a switch */ 238 return (swstmt); 239 case 2: /* a case or default */ 240 return (casestmt); 241 242 case 3: /* a "struct" */ 243 if (ps.p_l_follow) 244 break; /* inside parens: cast */ 245 l_struct = true; 246 247 /* 248 * Next time around, we will want to know that we have had a 249 * 'struct' 250 */ 251 case 4: /* one of the declaration keywords */ 252 if (ps.p_l_follow) { 253 ps.cast_mask |= 1 << ps.p_l_follow; 254 break; /* inside parens: cast */ 255 } 256 last_code = decl; 257 return (decl); 258 259 case 5: /* if, while, for */ 260 return (sp_paren); 261 262 case 6: /* do, else */ 263 return (sp_nparen); 264 265 case 7: 266 ps.sizeof_keyword = true; 267 default: /* all others are treated like any other 268 * identifier */ 269 return (ident); 270 } /* end of switch */ 271 } /* end of if (found_it) */ 272 if (*buf_ptr == '(' && ps.tos <= 1 && ps.ind_level == 0) { 273 register char *tp = buf_ptr; 274 while (tp < buf_end) 275 if (*tp++ == ')' && (*tp == ';' || *tp == ',')) 276 goto not_proc; 277 strncpy(ps.procname, token, sizeof ps.procname - 1); 278 ps.in_parameter_declaration = 1; 279 rparen_count = 1; 280 not_proc:; 281 } 282 /* 283 * The following hack attempts to guess whether or not the current 284 * token is in fact a declaration keyword -- one that has been 285 * typedefd 286 */ 287 if (((*buf_ptr == '*' && buf_ptr[1] != '=') || isalpha(*buf_ptr) || *buf_ptr == '_') 288 && !ps.p_l_follow 289 && !ps.block_init 290 && (ps.last_token == rparen || ps.last_token == semicolon || 291 ps.last_token == decl || 292 ps.last_token == lbrace || ps.last_token == rbrace)) { 293 ps.its_a_keyword = true; 294 ps.last_u_d = true; 295 last_code = decl; 296 return decl; 297 } 298 if (last_code == decl) /* if this is a declared variable, then 299 * following sign is unary */ 300 ps.last_u_d = true; /* will make "int a -1" work */ 301 last_code = ident; 302 return (ident); /* the ident is not in the list */ 303 } /* end of procesing for alpanum character */ 304 305 /* Scan a non-alphanumeric token */ 306 307 *e_token++ = *buf_ptr; /* if it is only a one-character token, it is 308 * moved here */ 309 *e_token = '\0'; 310 if (++buf_ptr >= buf_end) 311 fill_buffer(); 312 313 switch (*token) { 314 case '\n': 315 unary_delim = ps.last_u_d; 316 ps.last_nl = true; /* remember that we just had a newline */ 317 code = (had_eof ? 0 : newline); 318 319 /* 320 * if data has been exausted, the newline is a dummy, and we should 321 * return code to stop 322 */ 323 break; 324 325 case '\'': /* start of quoted character */ 326 case '"': /* start of string */ 327 qchar = *token; 328 if (troff) { 329 e_token[-1] = '`'; 330 if (qchar == '"') 331 *e_token++ = '`'; 332 e_token = chfont(&bodyf, &stringf, e_token); 333 } 334 do { /* copy the string */ 335 while (1) { /* move one character or [/<char>]<char> */ 336 if (*buf_ptr == '\n') { 337 printf("%d: Unterminated literal\n", line_no); 338 goto stop_lit; 339 } 340 CHECK_SIZE_TOKEN; /* Only have to do this once in this loop, 341 * since CHECK_SIZE guarantees that there 342 * are at least 5 entries left */ 343 *e_token = *buf_ptr++; 344 if (buf_ptr >= buf_end) 345 fill_buffer(); 346 if (*e_token == BACKSLASH) { /* if escape, copy extra char */ 347 if (*buf_ptr == '\n') /* check for escaped newline */ 348 ++line_no; 349 if (troff) { 350 *++e_token = BACKSLASH; 351 if (*buf_ptr == BACKSLASH) 352 *++e_token = BACKSLASH; 353 } 354 *++e_token = *buf_ptr++; 355 ++e_token; /* we must increment this again because we 356 * copied two chars */ 357 if (buf_ptr >= buf_end) 358 fill_buffer(); 359 } 360 else 361 break; /* we copied one character */ 362 } /* end of while (1) */ 363 } while (*e_token++ != qchar); 364 if (troff) { 365 e_token = chfont(&stringf, &bodyf, e_token - 1); 366 if (qchar == '"') 367 *e_token++ = '\''; 368 } 369 stop_lit: 370 code = ident; 371 break; 372 373 case ('('): 374 case ('['): 375 unary_delim = true; 376 code = lparen; 377 break; 378 379 case (')'): 380 case (']'): 381 code = rparen; 382 break; 383 384 case '#': 385 unary_delim = ps.last_u_d; 386 code = preesc; 387 break; 388 389 case '?': 390 unary_delim = true; 391 code = question; 392 break; 393 394 case (':'): 395 code = colon; 396 unary_delim = true; 397 break; 398 399 case (';'): 400 unary_delim = true; 401 code = semicolon; 402 break; 403 404 case ('{'): 405 unary_delim = true; 406 407 /* 408 * if (ps.in_or_st) ps.block_init = 1; 409 */ 410 /* ? code = ps.block_init ? lparen : lbrace; */ 411 code = lbrace; 412 break; 413 414 case ('}'): 415 unary_delim = true; 416 /* ? code = ps.block_init ? rparen : rbrace; */ 417 code = rbrace; 418 break; 419 420 case 014: /* a form feed */ 421 unary_delim = ps.last_u_d; 422 ps.last_nl = true; /* remember this so we can set 'ps.col_1' 423 * right */ 424 code = form_feed; 425 break; 426 427 case (','): 428 unary_delim = true; 429 code = comma; 430 break; 431 432 case '.': 433 unary_delim = false; 434 code = period; 435 break; 436 437 case '-': 438 case '+': /* check for -, +, --, ++ */ 439 code = (ps.last_u_d ? unary_op : binary_op); 440 unary_delim = true; 441 442 if (*buf_ptr == token[0]) { 443 /* check for doubled character */ 444 *e_token++ = *buf_ptr++; 445 /* buffer overflow will be checked at end of loop */ 446 if (last_code == ident || last_code == rparen) { 447 code = (ps.last_u_d ? unary_op : postop); 448 /* check for following ++ or -- */ 449 unary_delim = false; 450 } 451 } 452 else if (*buf_ptr == '=') 453 /* check for operator += */ 454 *e_token++ = *buf_ptr++; 455 else if (*buf_ptr == '>') { 456 /* check for operator -> */ 457 *e_token++ = *buf_ptr++; 458 if (!pointer_as_binop) { 459 unary_delim = false; 460 code = unary_op; 461 ps.want_blank = false; 462 } 463 } 464 break; /* buffer overflow will be checked at end of 465 * switch */ 466 467 case '=': 468 if (ps.in_or_st) 469 ps.block_init = 1; 470 #ifdef undef 471 if (chartype[*buf_ptr] == opchar) { /* we have two char assignment */ 472 e_token[-1] = *buf_ptr++; 473 if ((e_token[-1] == '<' || e_token[-1] == '>') && e_token[-1] == *buf_ptr) 474 *e_token++ = *buf_ptr++; 475 *e_token++ = '='; /* Flip =+ to += */ 476 *e_token = 0; 477 } 478 #else 479 if (*buf_ptr == '=') {/* == */ 480 *e_token++ = '='; /* Flip =+ to += */ 481 buf_ptr++; 482 *e_token = 0; 483 } 484 #endif 485 code = binary_op; 486 unary_delim = true; 487 break; 488 /* can drop thru!!! */ 489 490 case '>': 491 case '<': 492 case '!': /* ops like <, <<, <=, !=, etc */ 493 if (*buf_ptr == '>' || *buf_ptr == '<' || *buf_ptr == '=') { 494 *e_token++ = *buf_ptr; 495 if (++buf_ptr >= buf_end) 496 fill_buffer(); 497 } 498 if (*buf_ptr == '=') 499 *e_token++ = *buf_ptr++; 500 code = (ps.last_u_d ? unary_op : binary_op); 501 unary_delim = true; 502 break; 503 504 default: 505 if (token[0] == '/' && *buf_ptr == '*') { 506 /* it is start of comment */ 507 *e_token++ = '*'; 508 509 if (++buf_ptr >= buf_end) 510 fill_buffer(); 511 512 code = comment; 513 unary_delim = ps.last_u_d; 514 break; 515 } 516 while (*(e_token - 1) == *buf_ptr || *buf_ptr == '=') { 517 /* 518 * handle ||, &&, etc, and also things as in int *****i 519 */ 520 *e_token++ = *buf_ptr; 521 if (++buf_ptr >= buf_end) 522 fill_buffer(); 523 } 524 code = (ps.last_u_d ? unary_op : binary_op); 525 unary_delim = true; 526 527 528 } /* end of switch */ 529 if (code != newline) { 530 l_struct = false; 531 last_code = code; 532 } 533 if (buf_ptr >= buf_end) /* check for input buffer empty */ 534 fill_buffer(); 535 ps.last_u_d = unary_delim; 536 *e_token = '\0'; /* null terminate the token */ 537 return (code); 538 } 539 540 /* 541 * Add the given keyword to the keyword table, using val as the keyword type 542 */ 543 addkey(key, val) 544 char *key; 545 { 546 register struct templ *p = specials; 547 while (p->rwd) 548 if (p->rwd[0] == key[0] && strcmp(p->rwd, key) == 0) 549 return; 550 else 551 p++; 552 if (p >= specials + sizeof specials / sizeof specials[0]) 553 return; /* For now, table overflows are silently 554 * ignored */ 555 p->rwd = key; 556 p->rwcode = val; 557 p[1].rwd = 0; 558 p[1].rwcode = 0; 559 return; 560 } 561